rdf-tabular 0.1.3.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,205 @@
1
+ -# This template is used for generating a rollup EARL report. It expects to be
2
+ -# called with a single _tests_ local with the following structure
3
+ - require 'cgi'
4
+ - require 'digest'
5
+
6
+ !!! 5
7
+ %html{:prefix => "earl: http://www.w3.org/ns/earl# doap: http://usefulinc.com/ns/doap# mf: http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"}
8
+ - subjects = tests['testSubjects']
9
+ %head
10
+ %meta{"http-equiv" => "Content-Type", :content => "text/html;charset=utf-8"}
11
+ %meta{name: "viewport", content: "width=device-width, initial-scale=1.0"}
12
+ %link{rel: "stylesheet", type: "text/css", href: "https://www.w3.org/StyleSheets/TR/base"}
13
+ %title
14
+ = tests['name']
15
+ Implementation Report
16
+ :css
17
+ span[property='dc:description'] { display: none; }
18
+ td.PASS { color: green; }
19
+ td.FAIL { color: red; }
20
+ table.report {
21
+ border-width: 1px;
22
+ border-spacing: 2px;
23
+ border-style: outset;
24
+ border-color: gray;
25
+ border-collapse: separate;
26
+ background-color: white;
27
+ }
28
+ table.report th {
29
+ border-width: 1px;
30
+ padding: 1px;
31
+ border-style: inset;
32
+ border-color: gray;
33
+ background-color: white;
34
+ -moz-border-radius: ;
35
+ }
36
+ table.report td {
37
+ border-width: 1px;
38
+ padding: 1px;
39
+ border-style: inset;
40
+ border-color: gray;
41
+ background-color: white;
42
+ -moz-border-radius: ;
43
+ }
44
+ tr.summary {font-weight: bold;}
45
+ td.passed-all {color: green;}
46
+ td.passed-most {color: darkorange;}
47
+ td.passed-some {color: red;}
48
+ td.passed-none {color: gray;}
49
+ em.rfc2119 {
50
+ text-transform: lowercase;
51
+ font-variant: small-caps;
52
+ font-style: normal;
53
+ color: #900;
54
+ }
55
+ a.testlink {
56
+ color: inherit;
57
+ text-decoration: none;
58
+ }
59
+ a.testlink:hover {
60
+ text-decoration: underline;
61
+ }
62
+ %body
63
+ - subject_refs = {}
64
+ - tests['entries'].each {|m| m['title'] ||= m['description']}
65
+ %section{:about => tests['@id'], typeof: Array(tests['@type']).join(" ")}
66
+ %h2
67
+ Ruby rdf-tabular gem test results
68
+ %p
69
+ This document reports conformance for for the following specifications:
70
+ %ul
71
+ %li
72
+ %a{property: "doap:name", href: "http://www.w3.org/TR/tabular-data-model/"}="MetaModel for Tabular Data and Metadata on the Web"
73
+ %li
74
+ %a{property: "doap:name", href: "http://www.w3.org/TR/tabular-metadata/"}="Metadata Vocabulary for Tabular Data"
75
+ %li
76
+ %a{property: "doap:name", href: "http://www.w3.org/TR/csv2rdf/"}="Generating RDF from Tabular Data on the Web"
77
+ %li
78
+ %a{property: "doap:name", href: "http://www.w3.org/TR/csv2json/"}="Generating JSON from Tabular Data on the Web"
79
+ %p
80
+ This report is also available in
81
+ %a{:href => "earl.ttl"}
82
+ Turtle
83
+ %dl
84
+ - subjects.each_with_index do |subject, index|
85
+ - subject_refs[subject['@id']] = "subj_#{index}"
86
+ %dt{:id => subject_refs[subject['@id']]}
87
+ %a{:href => subject['@id']}
88
+ %span{:about => subject['@id'], property: "doap:name"}<= subject['name']
89
+ %dd{property: "earl:testSubjects", resource: subject['@id'], typeof: Array(subject['@type']).join(" "), :inlist => true}
90
+ %dl
91
+ - if subject['doapDesc']
92
+ %dt= "Description"
93
+ %dd{property: "doap:description", :lang => 'en'}<
94
+ ~ CGI.escapeHTML subject['doapDesc']
95
+ - if subject['language']
96
+ %dt= "Programming Language"
97
+ %dd{property: "doap:programming-language"}<
98
+ ~ CGI.escapeHTML subject['language']
99
+ - if subject['homepage']
100
+ %dt= "Home Page"
101
+ %dd{property: "doap:homepage"}
102
+ %a{href: subject['homepage']}
103
+ ~ CGI.escapeHTML subject['homepage']
104
+ - if subject['developer']
105
+ %dt= "Developer"
106
+ %dd{:rel => "doap:developer"}
107
+ - subject['developer'].each do |dev|
108
+ %div{resource: dev['@id'], typeof: Array(dev['@type']).join(" ")}
109
+ - if dev.has_key?('@id')
110
+ %a{:href => dev['@id']}
111
+ %span{property: "foaf:name"}<
112
+ ~ CGI.escapeHTML dev['foaf:name']
113
+ - else
114
+ %span{property: "foaf:name"}<
115
+ ~ CGI.escapeHTML dev['foaf:name']
116
+ - if dev['foaf:homepage']
117
+ %a{property: "foaf:homepage", href: dev['foaf:homepage']}
118
+ ~ CGI.escapeHTML dev['foaf:homepage']
119
+ %dt
120
+ Test Suite Compliance
121
+ %dd
122
+ %table.report
123
+ %tbody
124
+ - tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
125
+ - passed = manifest['entries'].select {|t| t['assertions'][index]['result']['outcome'] == 'earl:passed' }.length
126
+ - total = manifest['entries'].length
127
+ - pct = (passed * 100.0) / total
128
+ - cls = (pct == 100.0 ? 'passed-all' : (pct >= 85.0) ? 'passed-most' : (pct == 0.0 ? 'passed-none' : 'passed-some'))
129
+ %tr
130
+ %td
131
+ %a{href: "##{manifest['title']}"}
132
+ ~ manifest['title']
133
+ %td{:class => cls}
134
+ = pct == 0.0 ? "Untested" : "#{passed}/#{total} (#{'%.1f' % pct}%)"
135
+ %section
136
+ %h2
137
+ Individual Test Results
138
+ - tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
139
+ - test_cases = manifest['entries']
140
+ %section{id: manifest['title'], typeof: manifest['@type'].join(" "), resource: manifest['@id']}
141
+ %h2{property: "dc:title mf:name"}<=manifest['title']
142
+ - Array(manifest['description']).each do |desc|
143
+ %p{property: "rdfs:comment"}<
144
+ ~ CGI.escapeHTML desc
145
+ %table.report
146
+ - skip_subject = {}
147
+ - passed_tests = []
148
+ %tr
149
+ %th
150
+ Test
151
+ - subjects.each_with_index do |subject, index|
152
+ - subject_refs[subject['@id']] = "subj_#{index}"
153
+ -# If subject is untested for every test in this manifest, skip it
154
+ - skip_subject[subject['@id']] = manifest['entries'].all? {|t| t['assertions'][index]['result']['outcome'] == 'earl:untested'}
155
+ - unless skip_subject[subject['@id']]
156
+ %th
157
+ %a{:href => '#' + subject_refs[subject['@id']]}<=subject['name']
158
+ - test_cases.each do |test|
159
+ %tr{:rel => "mf:entries", typeof: test['@type'].join(" "), resource: test['@id'], :inlist => true}
160
+ %td
161
+ = "Test #{test['@id'].split("#").last}: #{CGI.escapeHTML test['title']}"
162
+ - test['assertions'].each_with_index do |assertion, ndx|
163
+ - next if skip_subject[assertion['subject']]
164
+ - pass_fail = assertion['result']['outcome'].split(':').last.upcase.sub(/(PASS|FAIL)ED$/, '\1')
165
+ - passed_tests[ndx] = (passed_tests[ndx] || 0) + (pass_fail == 'PASS' ? 1 : 0)
166
+ %td{:class => pass_fail, property: "earl:assertions", typeof: assertion['@type'], :inlist => true}
167
+ - if assertion['assertedBy']
168
+ %link{property: "earl:assertedBy", :href => assertion['assertedBy']}
169
+ %link{property: "earl:test", :href => assertion['test']}
170
+ %link{property: "earl:subject", :href => assertion['subject']}
171
+ - if assertion['mode']
172
+ %link{property: 'earl:mode', :href => assertion['mode']}
173
+ %span{property: "earl:result", typeof: assertion['result']['@type']}
174
+ %span{property: 'earl:outcome', resource: assertion['result']['outcome']}
175
+ = pass_fail
176
+ %tr.summary
177
+ %td
178
+ = "Percentage passed out of #{manifest['entries'].length} Tests"
179
+ - passed_tests.compact.each do |r|
180
+ - pct = (r * 100.0) / manifest['entries'].length
181
+ %td{:class => (pct == 100.0 ? 'passed-all' : (pct >= 95.0 ? 'passed-most' : 'passed-some'))}
182
+ = "#{'%.1f' % pct}%"
183
+ %section#appendix{property: "earl:generatedBy", resource: tests['generatedBy']['@id'], typeof: tests['generatedBy']['@type']}
184
+ %h2
185
+ Report Generation Software
186
+ - doap = tests['generatedBy']
187
+ - rel = doap['release']
188
+ %p
189
+ This report generated by
190
+ %span{property: "doap:name"}<
191
+ %a{:href => tests['generatedBy']['@id']}<
192
+ = doap['name']
193
+ %meta{property: "doap:shortdesc", :content => doap['shortdesc'], :lang => 'en'}
194
+ %meta{property: "doap:description", :content => doap['doapDesc'], :lang => 'en'}
195
+ version
196
+ %span{property: "doap:release", resource: rel['@id'], typeof: 'doap:Version'}
197
+ %span{property: "doap:revision"}<=rel['revision']
198
+ %meta{property: "doap:name", :content => rel['name']}
199
+ %meta{property: "doap:created", :content => rel['created'], :datatype => "xsd:date"}
200
+ an
201
+ %a{property: "doap:license", :href => doap['license']}<="Unlicensed"
202
+ %span{property: "doap:programming-language"}<="Ruby"
203
+ application. More information is available at
204
+ %a{property: "doap:homepage", :href => doap['homepage']}<=doap['homepage']
205
+ = "."
@@ -0,0 +1,4 @@
1
+ {+url}-metadata.json
2
+ csv-metadata.json
3
+ {+url}.json
4
+ csvm.json
@@ -1,9 +1,5 @@
1
1
  $:.unshift(File.expand_path("..", __FILE__))
2
2
  require 'rdf' # @see http://rubygems.org/gems/rdf
3
- begin
4
- require 'byebug' # REMOVE ME
5
- rescue LoadError
6
- end
7
3
  require 'csv'
8
4
 
9
5
  module RDF
@@ -28,6 +24,16 @@ module RDF
28
24
  autoload :Transformation, 'rdf/tabular/metadata'
29
25
  autoload :VERSION, 'rdf/tabular/version'
30
26
 
27
+ # Metadata errors detected
28
+ class Error < RDF::ReaderError; end
29
+
30
+ # Relative location of site-wide configuration file
31
+ SITE_WIDE_CONFIG = "/.well-known/csvm".freeze
32
+ SITE_WIDE_DEFAULT = %(
33
+ {+url}-metadata.json
34
+ csv-metadata.json
35
+ ).gsub(/^\s+/, '').freeze
36
+
31
37
  def self.debug?; @debug; end
32
38
  def self.debug=(value); @debug = value; end
33
39
  end
@@ -8,12 +8,13 @@ module RDF::Tabular
8
8
  # RDF::Format.for(:tsv) #=> RDF::Tabular::Format
9
9
  # RDF::Format.for("etc/foaf.csv")
10
10
  # RDF::Format.for("etc/foaf.tsv")
11
- # RDF::Format.for(:file_name => "etc/foaf.csv")
12
- # RDF::Format.for(:file_name => "etc/foaf.tsv")
13
- # RDF::Format.for(:file_extension => "csv")
14
- # RDF::Format.for(:file_extension => "tsv")
15
- # RDF::Format.for(:content_type => "text/csv")
16
- # RDF::Format.for(:content_type => "text/tab-separated-values")
11
+ # RDF::Format.for(file_name: "etc/foaf.csv")
12
+ # RDF::Format.for(file_name: "etc/foaf.tsv")
13
+ # RDF::Format.for(file_extension: "csv")
14
+ # RDF::Format.for(file_extension: "tsv")
15
+ # RDF::Format.for(content_type: "text/csv")
16
+ # RDF::Format.for(content_type: "text/tab-separated-values")
17
+ # RDF::Format.for(content_type: "application/csvm+json")
17
18
  #
18
19
  # @example Obtaining serialization format MIME types
19
20
  # RDF::Format.content_types #=> {"text/csv" => [RDF::Tabular::Format]}
@@ -25,7 +26,10 @@ module RDF::Tabular
25
26
  class Format < RDF::Format
26
27
  content_type 'text/csv',
27
28
  extensions: [:csv, :tsv],
28
- alias: 'text/tab-separated-values'
29
+ alias: %w{
30
+ text/tab-separated-values
31
+ application/csvm+json
32
+ }
29
33
  content_encoding 'utf-8'
30
34
 
31
35
  reader { RDF::Tabular::Reader }
@@ -11,8 +11,7 @@ require 'yaml' # used by BCP47, which should have required it.
11
11
  # CSVM Metadata processor
12
12
  #
13
13
  # * Extracts Metadata from file or Hash definition
14
- # * Merges multiple Metadata definitions
15
- # * Extract Metadata from a CSV file
14
+ # * Extract Embedded Metadata from a CSV file
16
15
  # * Return table-level annotations
17
16
  # * Return Column-level annotations
18
17
  # * Return row iterator with column information
@@ -45,30 +44,28 @@ module RDF::Tabular
45
44
  valueUrl: :uri_template,
46
45
  }.freeze
47
46
  INHERITED_DEFAULTS = {
48
- aboutUrl: "".freeze,
49
47
  default: "".freeze,
50
48
  lang: "und",
51
49
  null: "".freeze,
52
50
  ordered: false,
53
- propertyUrl: "".freeze,
54
51
  required: false,
55
52
  textDirection: "ltr".freeze,
56
- valueUrl: "".freeze,
57
53
  }.freeze
58
54
 
59
55
  # Valid datatypes
60
56
  DATATYPES = {
61
- anyAtomicType: RDF::XSD.anySimpleType,
57
+ anyAtomicType: RDF::XSD.anyAtomicType,
62
58
  anyURI: RDF::XSD.anyURI,
63
59
  base64Binary: RDF::XSD.basee65Binary,
64
60
  boolean: RDF::XSD.boolean,
65
61
  byte: RDF::XSD.byte,
66
62
  date: RDF::XSD.date,
67
63
  dateTime: RDF::XSD.dateTime,
68
- dateTimeDuration: RDF::XSD.dateTimeDuration,
64
+ dayTimeDuration: RDF::XSD.dayTimeDuration,
69
65
  dateTimeStamp: RDF::XSD.dateTimeStamp,
70
66
  decimal: RDF::XSD.decimal,
71
67
  double: RDF::XSD.double,
68
+ duration: RDF::XSD.duration,
72
69
  float: RDF::XSD.float,
73
70
  ENTITY: RDF::XSD.ENTITY,
74
71
  gDay: RDF::XSD.gDay,
@@ -84,6 +81,7 @@ module RDF::Tabular
84
81
  Name: RDF::XSD.Name,
85
82
  NCName: RDF::XSD.NCName,
86
83
  negativeInteger: RDF::XSD.negativeInteger,
84
+ NMTOKEN: RDF::XSD.NMTOKEN,
87
85
  nonNegativeInteger: RDF::XSD.nonNegativeInteger,
88
86
  nonPositiveInteger: RDF::XSD.nonPositiveInteger,
89
87
  normalizedString: RDF::XSD.normalizedString,
@@ -100,7 +98,7 @@ module RDF::Tabular
100
98
  unsignedShort: RDF::XSD.unsignedShort,
101
99
  yearMonthDuration: RDF::XSD.yearMonthDuration,
102
100
 
103
- any: RDF::XSD.anySimpleType,
101
+ any: RDF::XSD.anyAtomicType,
104
102
  binary: RDF::XSD.base64Binary,
105
103
  datetime: RDF::XSD.dateTime,
106
104
  html: RDF.HTML,
@@ -115,7 +113,7 @@ module RDF::Tabular
115
113
 
116
114
  # Local version of the context
117
115
  # @return [JSON::LD::Context]
118
- LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__))
116
+ LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__)).freeze
119
117
 
120
118
  # ID of this Metadata
121
119
  # @return [RDF::URI]
@@ -139,7 +137,9 @@ module RDF::Tabular
139
137
  #
140
138
  # @param [String] path
141
139
  # @param [Hash{Symbol => Object}] options
142
- # see `RDF::Util::File.open_file` in RDF.rb
140
+ # see `RDF::Util::File.open_file` in RDF.rb and {#new}
141
+ # @yield [Metadata]
142
+ # @raise [IOError] if file not found
143
143
  def self.open(path, options = {})
144
144
  options = options.merge(
145
145
  headers: {
@@ -152,8 +152,25 @@ module RDF::Tabular
152
152
  end
153
153
  end
154
154
 
155
+ # Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
156
+ # @param [String] base, the URL used for finding the file
157
+ # @return [Array<String>, false]
158
+ def self.site_wide_config(base)
159
+ require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
160
+ @cache ||= RDF::Util::Cache.new(-1)
161
+
162
+ config_loc = RDF::URI(base).join(SITE_WIDE_CONFIG).to_s
163
+ # Only load if we haven't tried before. Use `SITE_WIDE_DEFAULT` if not found
164
+ if @cache[config_loc].nil?
165
+ @cache[config_loc] = RDF::Util::File.open_file(config_loc) do |rd|
166
+ rd.each_line.to_a
167
+ end rescue SITE_WIDE_DEFAULT.split
168
+ end
169
+ @cache[config_loc]
170
+ end
171
+
155
172
  ##
156
- # Return metadata for a file, based on user-specified and path-relative locations from an input file
173
+ # Return metadata for a file, based on user-specified, linked, and site-wide location configuration from an input file
157
174
  # @param [IO, StringIO] input
158
175
  # @param [Hash{Symbol => Object}] options
159
176
  # @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
@@ -175,22 +192,46 @@ module RDF::Tabular
175
192
  # Search for metadata until found
176
193
 
177
194
  # load link metadata, if available
178
- locs = []
179
- if input.respond_to?(:links) &&
195
+ all_locs = []
196
+ if !metadata && input.respond_to?(:links) &&
180
197
  link = input.links.find_link(%w(rel describedby))
181
- locs << RDF::URI(base).join(link.href)
198
+ link_loc = RDF::URI(base).join(link.href).to_s
199
+ md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
200
+ all_locs << link_loc if md
201
+ # Metadata must describe file to be useful
202
+ metadata = md if md && md.describes_file?(base)
182
203
  end
183
204
 
184
- if base
185
- locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
205
+ locs = []
206
+ # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
207
+ if !metadata && base
208
+ templates = site_wide_config(base)
209
+ debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
210
+ locs = templates.map do |template|
211
+ t = Addressable::Template.new(template)
212
+ RDF::URI(base).join(t.expand(url: base).to_s)
213
+ end
214
+ debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
215
+
216
+ locs.each do |loc|
217
+ metadata ||= begin
218
+ md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
219
+ # Metadata must describe file to be useful
220
+ all_locs << loc if md
221
+ md if md && md.describes_file?(base)
222
+ rescue IOError
223
+ debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
224
+ nil
225
+ end
226
+ end
186
227
  end
187
228
 
188
- locs.each do |loc|
189
- metadata ||= begin
190
- Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
191
- rescue
192
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
193
- nil
229
+ # If Metadata was found, but no metadata describes the file, issue a warning
230
+ if !all_locs.empty? && !metadata
231
+ warnings = options.fetch(:warnings, [])
232
+ warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
233
+ if options[:validate] && !options[:warnings]
234
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
194
235
  end
195
236
  end
196
237
 
@@ -198,7 +239,7 @@ module RDF::Tabular
198
239
  metadata = case
199
240
  when metadata then metadata
200
241
  when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
201
- else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: []}, options)
242
+ else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
202
243
  end
203
244
 
204
245
  # Make TableGroup, if not already
@@ -217,6 +258,8 @@ module RDF::Tabular
217
258
  else ::JSON.parse(input.to_s)
218
259
  end
219
260
 
261
+ raise ::JSON::ParserError unless object.is_a?(Hash)
262
+
220
263
  unless options[:parent]
221
264
  # Add context, if not set (which it should be)
222
265
  object['@context'] ||= options.delete(:@context) || options[:context]
@@ -237,7 +280,7 @@ module RDF::Tabular
237
280
  when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
238
281
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
239
282
  when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
240
- when %w(columns primaryKey foreignKeys).any? {|k| object_keys.include?(k)} then :Schema
283
+ when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
241
284
  when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
242
285
  when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
243
286
  when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
@@ -251,13 +294,15 @@ module RDF::Tabular
251
294
  when :Column then RDF::Tabular::Column
252
295
  when :Dialect then RDF::Tabular::Dialect
253
296
  else
254
- raise Error, "Unkown metadata type: #{type.inspect}"
297
+ raise Error, "Unknown metadata type: #{type.inspect}"
255
298
  end
256
299
  end
257
300
 
258
301
  md = klass.allocate
259
302
  md.send(:initialize, object, options)
260
303
  md
304
+ rescue ::JSON::ParserError
305
+ raise Error, "Expected input to be a JSON Object"
261
306
  end
262
307
 
263
308
  ##
@@ -271,6 +316,8 @@ module RDF::Tabular
271
316
  # Context used for this metadata. Taken from input if not provided
272
317
  # @option options [RDF::URI] :base
273
318
  # The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
319
+ # @option options [Boolean] :normalize normalize the object
320
+ # @option options [Boolean] :validate Strict metadata validation
274
321
  # @raise [Error]
275
322
  # @return [Metadata]
276
323
  def initialize(input, options = {})
@@ -285,15 +332,15 @@ module RDF::Tabular
285
332
  @context = case input['@context']
286
333
  when Array
287
334
  warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
288
- LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
335
+ LOCAL_CONTEXT.dup.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
289
336
  when Hash
290
337
  warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
291
- LOCAL_CONTEXT.parse(input['@context'])
292
- when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT
338
+ LOCAL_CONTEXT.dup.parse(input['@context'])
339
+ when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT.dup
293
340
  else
294
341
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
295
342
  warn "Context missing required value 'http://www.w3.org/ns/csvw'"
296
- LOCAL_CONTEXT
343
+ LOCAL_CONTEXT.dup
297
344
  end
298
345
  end
299
346
 
@@ -326,17 +373,17 @@ module RDF::Tabular
326
373
  when :url
327
374
  # URL of CSV relative to metadata
328
375
  object[:url] = value
329
- @url = base.join(value)
330
- @context.base = @url if @context # Use as base for expanding IRIs
376
+ @url = @options[:base].join(value)
377
+ @options[:base] = @url if @context # Use as base for expanding IRIs
331
378
  when :@id
332
379
  # metadata identifier
333
380
  object[:@id] = if value.is_a?(String)
334
381
  value
335
382
  else
336
383
  warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
337
- ""
384
+ "" # Default value
338
385
  end
339
- @id = base.join(object[:@id])
386
+ @id = @options[:base].join(object[:@id])
340
387
  else
341
388
  if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
342
389
  self.send("#{key}=".to_sym, value)
@@ -348,7 +395,15 @@ module RDF::Tabular
348
395
  end
349
396
 
350
397
  # Set type from @type, if present and not otherwise defined
351
- @type ||= object[:@type].to_sym if object[:@type]
398
+ @type = object[:@type].to_sym if object[:@type]
399
+
400
+ if options[:normalize]
401
+ # If normalizing, also remove remaining @context
402
+ self.normalize!
403
+ @context = nil
404
+ object.delete(:@context)
405
+ end
406
+
352
407
  if reason
353
408
  debug("md#initialize") {reason}
354
409
  debug("md#initialize") {"filenames: #{filenames}"}
@@ -356,8 +411,14 @@ module RDF::Tabular
356
411
  end
357
412
  end
358
413
 
359
- # Setters
414
+ # Getters and Setters
360
415
  INHERITED_PROPERTIES.keys.each do |key|
416
+ define_method(key) do
417
+ object.fetch(key) do
418
+ parent ? parent.send(key) : default_value(key)
419
+ end
420
+ end
421
+
361
422
  define_method("#{key}=".to_sym) do |value|
362
423
  invalid = case key
363
424
  when :aboutUrl, :default, :propertyUrl, :valueUrl
@@ -370,7 +431,7 @@ module RDF::Tabular
370
431
  when :ordered, :required
371
432
  "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
372
433
  when :separator
373
- "single character" unless value.nil? || value.is_a?(String) && value.length == 1
434
+ "string or null" unless value.nil? || value.is_a?(String)
374
435
  when :textDirection
375
436
  "rtl or ltr" unless %(rtl ltr).include?(value)
376
437
  when :datatype
@@ -379,7 +440,7 @@ module RDF::Tabular
379
440
 
380
441
  if invalid
381
442
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
382
- object[key] = default_value(key) unless default_value(key).nil?
443
+ object.delete(key)
383
444
  else
384
445
  object[key] = value
385
446
  end
@@ -399,18 +460,19 @@ module RDF::Tabular
399
460
  # An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
400
461
  # when loading a remote schema, assign @id from it's location if not already set
401
462
  def tableSchema=(value)
402
- case value
463
+ object[:tableSchema] = case value
403
464
  when String
404
- link = base.join(value).to_s
405
- s = Schema.open(link, @options.merge(parent: self, context: nil))
406
- s[:@id] ||= link
407
- object[:tableSchema] = s
465
+ link = context.base.join(value).to_s
466
+ md = Schema.open(link, @options.merge(parent: self, context: nil, normalize: true))
467
+ md[:@id] ||= link
468
+ md
408
469
  when Hash
409
- object[:tableSchema] = Metadata.new(value, @options.merge(parent: self, context: nil))
470
+ Schema.new(value, @options.merge(parent: self, context: nil))
410
471
  when Schema
411
- object[:tableSchema] = value
472
+ value
412
473
  else
413
474
  warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
475
+ Schema.new({}, @options.merge(parent: self, context: nil))
414
476
  end
415
477
  end
416
478
 
@@ -445,13 +507,16 @@ module RDF::Tabular
445
507
  end
446
508
 
447
509
  # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
448
- @dialect = case value
510
+ @dialect = object[:dialect] = case value
449
511
  when String
450
- object[:dialect] = Metadata.open(base.join(value), @options.merge(parent: self, context: nil))
512
+ link = context.base.join(value).to_s
513
+ md = Metadata.open(link, @options.merge(parent: self, context: nil, normalize: true))
514
+ md[:@id] ||= link
515
+ md
451
516
  when Hash
452
- object[:dialect] = Metadata.new(value, @options.merge(parent: self, context: nil))
517
+ Dialect.new(value, @options.merge(parent: self, context: nil))
453
518
  when Dialect
454
- object[:dialect] = value
519
+ value
455
520
  else
456
521
  warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
457
522
  nil
@@ -460,16 +525,18 @@ module RDF::Tabular
460
525
 
461
526
  # Set new datatype
462
527
  # @return [Dialect]
528
+ # @raise [Error] if datatype is not valid
463
529
  def datatype=(value)
464
530
  val = case value
465
531
  when Hash then Datatype.new(value, parent: self)
466
532
  else Datatype.new({base: value}, parent: self)
467
533
  end
468
534
 
469
- if val.valid?
535
+ if val.valid? || value.is_a?(Hash)
536
+ # Set it if it was specified as an object, which may cause validation errors later
470
537
  object[:datatype] = val
471
538
  else
472
- warn "#{type} has invalid property 'datatype': expected a Datatype"
539
+ warn "#{type} has invalid property 'datatype': expected a built-in or an object"
473
540
  end
474
541
  end
475
542
 
@@ -538,7 +605,7 @@ module RDF::Tabular
538
605
  value = object[key]
539
606
  case key
540
607
  when :base
541
- warn "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
608
+ errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
542
609
  when :columns
543
610
  value.each do |v|
544
611
  begin
@@ -549,14 +616,20 @@ module RDF::Tabular
549
616
  end
550
617
  column_names = value.map(&:name)
551
618
  errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
552
- when :dialect, :tables, :tableSchema, :transformations
619
+ when :datatype, :dialect, :tables, :tableSchema, :transformations
553
620
  Array(value).each do |t|
554
- begin
555
- t.validate!
556
- rescue Error => e
557
- errors << e.message
621
+ # Make sure value is of appropriate class
622
+ if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
623
+ begin
624
+ t.validate!
625
+ rescue Error => e
626
+ errors << e.message
627
+ end
628
+ else
629
+ errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
558
630
  end
559
631
  end
632
+ errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
560
633
  when :foreignKeys
561
634
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
562
635
  value.each do |fk|
@@ -577,13 +650,13 @@ module RDF::Tabular
577
650
  errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
578
651
  end
579
652
  # resource is the URL of a Table in the TableGroup
580
- ref = base.join(reference['resource']).to_s
653
+ ref = context.base.join(reference['resource']).to_s
581
654
  table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
582
655
  errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
583
656
  table.tableSchema if table
584
657
  elsif reference.has_key?('schemaReference')
585
658
  # resource is the @id of a Schema in the TableGroup
586
- ref = base.join(reference['schemaReference']).to_s
659
+ ref = context.base.join(reference['schemaReference']).to_s
587
660
  tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
588
661
  case tables.length
589
662
  when 0
@@ -608,6 +681,114 @@ module RDF::Tabular
608
681
  errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
609
682
  end
610
683
  end
684
+ when :format
685
+ case value
686
+ when Hash
687
+ # Object form only appropriate for numeric type
688
+ unless %w(
689
+ decimal integer long int short byte double float number
690
+ nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
691
+ unsignedLong unsignedInt unsignedShort unsignedByte
692
+ ).include?(self.base)
693
+ warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
694
+ object.delete(:format) # act as if not set
695
+ end
696
+
697
+ # Otherwise, if it exists, its a UAX35 number pattern
698
+ begin
699
+ parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
700
+ rescue ArgumentError => e
701
+ warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
702
+ object[:format].delete("pattern") # act as if not set
703
+ end
704
+ else
705
+ case self.base
706
+ when 'boolean'
707
+ unless value.split("|").length == 2
708
+ warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
709
+ object.delete(:format) # act as if not set
710
+ end
711
+ when :decimal, :integer, :long, :int, :short, :byte,
712
+ :nonNegativeInteger, :positiveInteger,
713
+ :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
714
+ :nonPositiveInteger, :negativeInteger,
715
+ :double, :float, :number
716
+ begin
717
+ parse_uax35_number(value, nil)
718
+ rescue ArgumentError => e
719
+ warn "#{type} has invalid property '#{key}': #{e.message}"
720
+ object.delete(:format) # act as if not set
721
+ end
722
+ when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
723
+ # Parse and validate format
724
+ begin
725
+ parse_uax35_date(value, nil)
726
+ rescue ArgumentError => e
727
+ warn "#{type} has invalid property '#{key}': #{e.message}"
728
+ object.delete(:format) # act as if not set
729
+ end
730
+ else
731
+ # Otherwise, if it exists, its a regular expression
732
+ begin
733
+ Regexp.compile(value)
734
+ rescue
735
+ warn "#{type} has invalid property '#{key}': #{$!.message}"
736
+ object.delete(:format) # act as if not set
737
+ end
738
+ end
739
+ end
740
+ when :length, :minLength, :maxLength
741
+ # Applications must raise an error if both length and minLength are specified and length is less than minLength.
742
+ # Similarly, applications must raise an error if both length and maxLength are specified and length is greater than maxLength.
743
+ if object[:length]
744
+ case key
745
+ when :minLength
746
+ errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
747
+ when :maxLength
748
+ errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
749
+ end
750
+ end
751
+
752
+ # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
753
+ if key == :maxLength && object[:minLength]
754
+ errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
755
+ end
756
+
757
+ # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
758
+ unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
759
+ errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
760
+ end
761
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
762
+ case self.base
763
+ when 'decimal', 'integer', 'long', 'int', 'short', 'byte', 'double', 'number', 'float',
764
+ 'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
765
+ 'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
766
+ 'duration', 'dayTimeDuration', 'yearMonthDuration'
767
+ errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
768
+
769
+ case key
770
+ when :minInclusive
771
+ # Applications MUST raise an error if both minInclusive and minExclusive are specified
772
+ errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
773
+
774
+ # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
775
+ errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
776
+
777
+ # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
778
+ errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
779
+ when :maxInclusive
780
+ # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
781
+ errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
782
+ when :minExclusive
783
+ # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
784
+ errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
785
+
786
+ # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
787
+ errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
788
+ end
789
+ else
790
+ errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
791
+ end
611
792
  when :notes
612
793
  unless value.is_a?(Hash) || value.is_a?(Array)
613
794
  errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
@@ -617,7 +798,7 @@ module RDF::Tabular
617
798
  rescue Error => e
618
799
  errors << "#{type} has invalid content '#{key}': #{e.message}"
619
800
  end
620
- when :primaryKey
801
+ when :primaryKey, :rowTitles
621
802
  # A column reference property that holds either a single reference to a column description object or an array of references.
622
803
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
623
804
  Array(value).each do |k|
@@ -628,9 +809,18 @@ module RDF::Tabular
628
809
  when :@id
629
810
  # Must not be a BNode
630
811
  if value.to_s.start_with?("_:")
631
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:"
812
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
813
+ end
814
+
815
+ # Datatype @id MUST NOT be the URL of a built-in type
816
+ if self.is_a?(Datatype) && DATATYPES.values.include?(value)
817
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
632
818
  end
633
819
  when :@type
820
+ # Must not be a BNode
821
+ if value.to_s.start_with?("_:")
822
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
823
+ end
634
824
  unless value.to_sym == type
635
825
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
636
826
  end
@@ -690,7 +880,9 @@ module RDF::Tabular
690
880
  next
691
881
  end
692
882
  number += 1
693
- yield(Row.new(data, self, number, number + skipped))
883
+ row = Row.new(data, self, number, number + skipped, @options)
884
+ (self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
885
+ yield(row)
694
886
  end
695
887
  end
696
888
 
@@ -775,18 +967,39 @@ module RDF::Tabular
775
967
  object.keys.any? {|k| k.to_s.include?(':')}
776
968
  end
777
969
 
970
+ # Does this metadata describe the file (URL)?
971
+ # @param [RDF::URL] url
972
+ # @return [Boolean]
973
+ def describes_file?(url)
974
+ case self
975
+ when TableGroup
976
+ tables.any? {|t| t.url == url}
977
+ else
978
+ self.url == url
979
+ end
980
+ end
981
+
778
982
  # Verify that the metadata we're using is compatible with embedded metadata
779
983
  # @param [Table] other
780
984
  # @raise [Error] if not compatible
781
985
  def verify_compatible!(other)
782
986
  if self.is_a?(TableGroup)
783
987
  unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
784
- raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
988
+ if @options[:validate]
989
+ raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
990
+ else
991
+ warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
992
+ end
785
993
  end
786
994
  else
787
995
  # Tables must have the same url
788
- raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}" unless
789
- url == other.url
996
+ unless url == other.url
997
+ if @options[:validate]
998
+ raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
999
+ else
1000
+ warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1001
+ end
1002
+ end
790
1003
 
791
1004
  # Each column description within B MUST match the corresponding column description in A for non-virtual columns
792
1005
  non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
@@ -798,23 +1011,41 @@ module RDF::Tabular
798
1011
  index = 0
799
1012
  object_columns.all? do |cb|
800
1013
  ca = non_virtual_columns[index]
801
- va = ([ca[:name]] + case ca[:titles]
802
- when String then [ca[:titles]]
803
- when Array then ca[:titles]
804
- when Hash then ca[:titles].values.flatten
805
- else []
806
- end).compact.map(&:downcase)
807
-
808
- vb = ([cb[:name]] + case cb[:titles]
809
- when String then [cb[:titles]]
810
- when Array then cb[:titles]
811
- when Hash then cb[:titles].values.flatten
812
- else []
813
- end).compact.map(&:downcase)
814
-
815
- # If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description in B is compatible with the matching column description in A
816
- raise Error, "Columns don't match: va: #{va}, vb: #{vb}" if (va & vb).empty?
817
- debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
1014
+ ta = ca.titles || {}
1015
+ tb = cb.titles || {}
1016
+ if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1017
+ true
1018
+ elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1019
+ raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
1020
+ elsif @options[:validate] || !ta.empty? && !tb.empty?
1021
+ # If validating, column compatibility requires strict match between titles
1022
+ titles_match = case
1023
+ when Array(ta['und']).any? {|t| tb.values.flatten.compact.include?(t)}
1024
+ true
1025
+ when Array(tb['und']).any? {|t| ta.values.flatten.compact.include?(t)}
1026
+ true
1027
+ when ta.any? {|lang, values| !(Array(tb[lang]) & Array(values)).empty?}
1028
+ # Match on title and language
1029
+ true
1030
+ else
1031
+ # Match if a language from ta is a prefix of a language from tb with matching titles
1032
+ ta.any? do |la, values|
1033
+ tb.keys.any? do |lb|
1034
+ (la.start_with?(lb) || lb.start_with?(la)) && !(Array(tb[lb]) & Array(values)).empty?
1035
+ end
1036
+ end
1037
+ end
1038
+
1039
+ if titles_match
1040
+ true
1041
+ elsif !@options[:validate]
1042
+ # If not validating, columns don't match, but processing continues
1043
+ warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1044
+ true
1045
+ else
1046
+ raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1047
+ end
1048
+ end
818
1049
  index += 1
819
1050
  end
820
1051
  end
@@ -822,7 +1053,7 @@ module RDF::Tabular
822
1053
  end
823
1054
 
824
1055
  def inspect
825
- self.class.name + object.inspect
1056
+ self.class.name + (respond_to?(:to_atd) ? to_atd : object).inspect
826
1057
  end
827
1058
 
828
1059
  # Proxy to @object
@@ -830,7 +1061,7 @@ module RDF::Tabular
830
1061
  def []=(key, value); object[key] = value; end
831
1062
  def each(&block); object.each(&block); end
832
1063
  def ==(other)
833
- object == (other.is_a?(Hash) ? other : other.object)
1064
+ object == (other.is_a?(Hash) ? other : (other.respond_to?(:object) ? other.object : other))
834
1065
  end
835
1066
  def to_json(args=nil); object.to_json(args); end
836
1067
 
@@ -845,8 +1076,6 @@ module RDF::Tabular
845
1076
  normalize_jsonld(key, value)
846
1077
  when ->(k) {key.to_s == '@context'}
847
1078
  "http://www.w3.org/ns/csvw"
848
- when :link
849
- base.join(value).to_s
850
1079
  when :array
851
1080
  value = [value] unless value.is_a?(Array)
852
1081
  value.map do |v|
@@ -854,13 +1083,15 @@ module RDF::Tabular
854
1083
  v.normalize!
855
1084
  elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
856
1085
  # SPEC SUGGESTION: special case for foreignKeys
857
- ref["resource"] = base.join(ref["resource"]).to_s if ref["resource"]
858
- ref["schemaReference"] = base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
1086
+ ref["resource"] = context.base.join(ref["resource"]).to_s if ref["resource"]
1087
+ ref["schemaReference"] = context.base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
859
1088
  v
860
1089
  else
861
1090
  v
862
1091
  end
863
1092
  end
1093
+ when :link
1094
+ context.base.join(value).to_s
864
1095
  when :object
865
1096
  case value
866
1097
  when Metadata then value.normalize!
@@ -872,6 +1103,14 @@ module RDF::Tabular
872
1103
  end
873
1104
  when :natural_language
874
1105
  value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
1106
+ when :atomic
1107
+ case key
1108
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1109
+ # Convert to a typed literal based on `base`. This will be validated later
1110
+ RDF::Literal(value, datatype: DATATYPES[self.base.to_sym])
1111
+ else
1112
+ value
1113
+ end
875
1114
  else
876
1115
  value
877
1116
  end
@@ -901,10 +1140,10 @@ module RDF::Tabular
901
1140
  raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
902
1141
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
903
1142
  raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
904
- elsif value['@language'] && !BCP47::Language.identify(value['@language'])
905
- warn "Value object with @language must use valid language: #{value.to_json}" if @warnings
1143
+ elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1144
+ warn "Value object with @language must use valid language: #{value.to_json}"
906
1145
  value.delete('@language')
907
- elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
1146
+ elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
908
1147
  raise Error, "Value object with @type must defined type: #{value.to_json}"
909
1148
  end
910
1149
  value
@@ -919,7 +1158,7 @@ module RDF::Tabular
919
1158
  Array(v).each do |vv|
920
1159
  # Validate that all type values transform to absolute IRIs
921
1160
  resource = context.expand_iri(vv, vocab: true)
922
- raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.uri? && resource.absolute?
1161
+ raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
923
1162
  end
924
1163
  nv[k] = v
925
1164
  when /^(@|_:)/
@@ -981,13 +1220,6 @@ module RDF::Tabular
981
1220
  end
982
1221
  end
983
1222
 
984
- def inherited_property_value(method)
985
- # Inherited properties
986
- object.fetch(method.to_sym) do
987
- parent.send(method) if parent
988
- end
989
- end
990
-
991
1223
  def default_value(prop)
992
1224
  self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
993
1225
  end
@@ -1033,24 +1265,30 @@ module RDF::Tabular
1033
1265
  transformations: :array,
1034
1266
  }.freeze
1035
1267
  DEFAULTS = {
1036
- tableDirection: "default".freeze,
1268
+ tableDirection: "auto".freeze,
1037
1269
  }.freeze
1038
1270
  REQUIRED = [:tables].freeze
1039
1271
 
1040
- # Setters
1272
+ # Getters and Setters
1041
1273
  PROPERTIES.each do |key, type|
1042
- next if [:tables, :tableSchema, :dialect, :transformations].include?(key)
1274
+ next if [:dialect].include?(key)
1275
+
1276
+ define_method(key) do
1277
+ object.fetch(key, DEFAULTS[key])
1278
+ end
1279
+
1280
+ next if [:tables, :tableSchema, :transformations].include?(key)
1043
1281
  define_method("#{key}=".to_sym) do |value|
1044
1282
  invalid = case key
1045
1283
  when :tableDirection
1046
- "rtl, ltr, or default" unless %(rtl ltr default).include?(value)
1284
+ "rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
1047
1285
  when :notes, :tables, :tableSchema, :dialect, :transformations
1048
1286
  # We handle this through a separate setters
1049
1287
  end
1050
1288
 
1051
1289
  if invalid
1052
1290
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1053
- object[key] = default_value(key) unless default_value(key).nil?
1291
+ object.delete(key)
1054
1292
  else
1055
1293
  object[key] = value
1056
1294
  end
@@ -1063,15 +1301,6 @@ module RDF::Tabular
1063
1301
  super || tables.any? {|t| t.has_annotations? }
1064
1302
  end
1065
1303
 
1066
- # Logic for accessing elements as accessors
1067
- def method_missing(method, *args)
1068
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1069
- inherited_property_value(method.to_sym)
1070
- else
1071
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1072
- end
1073
- end
1074
-
1075
1304
  ##
1076
1305
  # Iterate over all tables
1077
1306
  # @yield [Table]
@@ -1102,7 +1331,7 @@ module RDF::Tabular
1102
1331
  object.inject({
1103
1332
  "@id" => (id.to_s if id),
1104
1333
  "@type" => "AnnotatedTableGroup",
1105
- "tables" => []
1334
+ "tables" => Array(self.tables).map(&:to_atd)
1106
1335
  }) do |memo, (k, v)|
1107
1336
  memo[k.to_s] ||= v
1108
1337
  memo
@@ -1124,32 +1353,37 @@ module RDF::Tabular
1124
1353
  }.freeze
1125
1354
  DEFAULTS = {
1126
1355
  suppressOutput: false,
1127
- tableDirection: "default".freeze,
1356
+ tableDirection: "auto".freeze,
1128
1357
  }.freeze
1129
1358
  REQUIRED = [:url].freeze
1130
1359
 
1131
- # Setters
1360
+ # Getters and Setters
1132
1361
  PROPERTIES.each do |key, type|
1133
- next if [:tableSchema, :dialect, :transformations].include?(key)
1362
+ next if [:dialect, :url].include?(key)
1363
+ define_method(key) do
1364
+ object.fetch(key, DEFAULTS[key])
1365
+ end
1366
+
1367
+ next if [:tableSchema, :transformations].include?(key)
1134
1368
  define_method("#{key}=".to_sym) do |value|
1135
1369
  invalid = case key
1136
1370
  when :suppressOutput
1137
1371
  "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1138
1372
  when :tableDirection
1139
- "rtl, ltr, or default" unless %(rtl ltr default).include?(value)
1373
+ "rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
1140
1374
  when :url
1141
- "valid URL" unless value.is_a?(String) && base.join(value).valid?
1375
+ "valid URL" unless value.is_a?(String) && context.base.join(value).valid?
1142
1376
  when :notes, :tableSchema, :dialect, :transformations
1143
1377
  # We handle this through a separate setters
1144
1378
  end
1145
1379
 
1146
1380
  if invalid
1147
1381
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1148
- object[key] = default_value(key) unless default_value(key).nil?
1382
+ object.delete(key)
1149
1383
  elsif key == :url
1150
1384
  # URL of CSV relative to metadata
1151
1385
  object[:url] = value
1152
- @url = base.join(value)
1386
+ @url = context.base.join(value)
1153
1387
  @context.base = @url if @context # Use as base for expanding IRIs
1154
1388
  else
1155
1389
  object[key] = value
@@ -1180,22 +1414,13 @@ module RDF::Tabular
1180
1414
  "@id" => (id.to_s if id),
1181
1415
  "@type" => "AnnotatedTable",
1182
1416
  "url" => self.url.to_s,
1183
- "columns" => tableSchema.columns.map(&:to_atd),
1417
+ "columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
1184
1418
  "rows" => []
1185
1419
  }) do |memo, (k, v)|
1186
1420
  memo[k.to_s] ||= v
1187
1421
  memo
1188
1422
  end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
1189
1423
  end
1190
-
1191
- # Logic for accessing elements as accessors
1192
- def method_missing(method, *args)
1193
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1194
- inherited_property_value(method.to_sym)
1195
- else
1196
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1197
- end
1198
- end
1199
1424
  end
1200
1425
 
1201
1426
  class Schema < Metadata
@@ -1205,21 +1430,26 @@ module RDF::Tabular
1205
1430
  columns: :array,
1206
1431
  foreignKeys: :array,
1207
1432
  primaryKey: :column_reference,
1433
+ rowTitles: :column_reference,
1208
1434
  }.freeze
1209
1435
  DEFAULTS = {}.freeze
1210
1436
  REQUIRED = [].freeze
1211
1437
 
1212
- # Setters
1438
+ # Getters and Setters
1213
1439
  PROPERTIES.each do |key, type|
1440
+ define_method(key) do
1441
+ object.fetch(key, DEFAULTS[key])
1442
+ end
1443
+
1214
1444
  define_method("#{key}=".to_sym) do |value|
1215
1445
  invalid = case key
1216
- when :primaryKey
1446
+ when :primaryKey, :rowTitles
1217
1447
  "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
1218
1448
  end
1219
1449
 
1220
1450
  if invalid
1221
1451
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1222
- object[key] = default_value(key) unless default_value(key).nil?
1452
+ object.delete(key)
1223
1453
  else
1224
1454
  object[key] = value
1225
1455
  end
@@ -1270,12 +1500,21 @@ module RDF::Tabular
1270
1500
  end
1271
1501
  end
1272
1502
 
1273
- # Logic for accessing elements as accessors
1274
- def method_missing(method, *args)
1275
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1276
- inherited_property_value(method.to_sym)
1277
- else
1278
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1503
+ ##
1504
+ # List of foreign keys referencing the specified table
1505
+ #
1506
+ # @param [Table] table
1507
+ # @return [Array<Hash>]
1508
+ def foreign_keys_referencing(table)
1509
+ Array(foreignKeys).select do |fk|
1510
+ reference = fk['reference']
1511
+ if reference['resource']
1512
+ ref = context.base.join(reference['resource']).to_s
1513
+ table.url == ref
1514
+ else # schemaReference
1515
+ ref = context.base.join(reference['schemaReference']).to_s
1516
+ table.tableSchema.id == ref
1517
+ end
1279
1518
  end
1280
1519
  end
1281
1520
  end
@@ -1321,8 +1560,12 @@ module RDF::Tabular
1321
1560
  super || columns.any? {|c| c.has_annotations? }
1322
1561
  end
1323
1562
 
1324
- # Setters
1563
+ # Getters and Setters
1325
1564
  PROPERTIES.each do |key, t|
1565
+ define_method(key) do
1566
+ object.fetch(key, DEFAULTS[key])
1567
+ end
1568
+
1326
1569
  define_method("#{key}=".to_sym) do |value|
1327
1570
  invalid = case key
1328
1571
  when :name
@@ -1339,7 +1582,7 @@ module RDF::Tabular
1339
1582
  object.delete(key) if object[key].nil?
1340
1583
  elsif invalid
1341
1584
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1342
- object[key] = default_value(key) unless default_value(key).nil?
1585
+ object.delete(key)
1343
1586
  else
1344
1587
  object[key] = value
1345
1588
  end
@@ -1360,7 +1603,7 @@ module RDF::Tabular
1360
1603
  # @return [RDF::URI]
1361
1604
  def id;
1362
1605
  url = table ? table.url : RDF::URI("")
1363
- url + "#col=#{self.sourceNumber}";
1606
+ url.to_s + "#col=#{self.sourceNumber}";
1364
1607
  end
1365
1608
 
1366
1609
  # Return Annotated Column representation
@@ -1380,15 +1623,6 @@ module RDF::Tabular
1380
1623
  memo
1381
1624
  end.delete_if {|k,v| v.nil?}
1382
1625
  end
1383
-
1384
- # Logic for accessing elements as accessors
1385
- def method_missing(method, *args)
1386
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1387
- inherited_property_value(method.to_sym)
1388
- else
1389
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1390
- end
1391
- end
1392
1626
  end
1393
1627
 
1394
1628
  class Transformation < Metadata
@@ -1404,8 +1638,13 @@ module RDF::Tabular
1404
1638
  DEFAULTS = {}.freeze
1405
1639
  REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1406
1640
 
1407
- # Setters
1641
+ # Getters and Setters
1408
1642
  PROPERTIES.each do |key, type|
1643
+ next if [:url].include?(key)
1644
+ define_method(key) do
1645
+ object.fetch(key, DEFAULTS[key])
1646
+ end
1647
+
1409
1648
  define_method("#{key}=".to_sym) do |value|
1410
1649
  invalid = case key
1411
1650
  when :scriptFormat, :targetFormat
@@ -1416,17 +1655,12 @@ module RDF::Tabular
1416
1655
 
1417
1656
  if invalid
1418
1657
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1419
- object[key] = default_value(key) unless default_value(key).nil?
1658
+ object.delete(key)
1420
1659
  else
1421
1660
  object[key] = value
1422
1661
  end
1423
1662
  end
1424
1663
  end
1425
-
1426
- # Logic for accessing elements as accessors
1427
- def method_missing(method, *args)
1428
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1429
- end
1430
1664
  end
1431
1665
 
1432
1666
  class Dialect < Metadata
@@ -1444,7 +1678,7 @@ module RDF::Tabular
1444
1678
  skipColumns: 0,
1445
1679
  skipInitialSpace: false,
1446
1680
  skipRows: 0,
1447
- trim: false
1681
+ trim: true
1448
1682
  }.freeze
1449
1683
 
1450
1684
  PROPERTIES = {
@@ -1467,13 +1701,15 @@ module RDF::Tabular
1467
1701
 
1468
1702
  REQUIRED = [].freeze
1469
1703
 
1470
- # Setters
1704
+ # Getters and Setters
1471
1705
  PROPERTIES.keys.each do |key|
1706
+ define_method(key) do
1707
+ object.fetch(key, DEFAULTS[key])
1708
+ end
1709
+
1472
1710
  define_method("#{key}=".to_sym) do |value|
1473
1711
  invalid = case key
1474
- when :commentPrefix, :delimiter, :quoteChar
1475
- "a single character string" unless value.is_a?(String) && value.length == 1
1476
- when :lineTerminators
1712
+ when :commentPrefix, :delimiter, :quoteChar, :lineTerminators
1477
1713
  "a string" unless value.is_a?(String)
1478
1714
  when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
1479
1715
  "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
@@ -1493,7 +1729,7 @@ module RDF::Tabular
1493
1729
  object.delete(key) if object[key].nil?
1494
1730
  elsif invalid
1495
1731
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1496
- object[key] = default_value(key) unless default_value(key).nil?
1732
+ object.delete(key)
1497
1733
  else
1498
1734
  object[key] = value
1499
1735
  end
@@ -1515,7 +1751,7 @@ module RDF::Tabular
1515
1751
  # default for trim comes from skipInitialSpace
1516
1752
  # @return [Boolean, String]
1517
1753
  def trim
1518
- object.fetch(:trim, self.skipInitialSpace ? 'start' : false)
1754
+ object.fetch(:trim, self.skipInitialSpace ? 'start' : true)
1519
1755
  end
1520
1756
 
1521
1757
  ##
@@ -1546,7 +1782,8 @@ module RDF::Tabular
1546
1782
  }
1547
1783
  }
1548
1784
  metadata ||= table # In case the embedded metadata becomes the final metadata
1549
- metadata["lang"] = options[:lang] if options[:lang]
1785
+ lang = metadata["lang"] = options[:lang] if options[:lang]
1786
+ lang ||= 'und'
1550
1787
 
1551
1788
  # Set encoding on input
1552
1789
  csv = ::CSV.new(input, csv_options)
@@ -1575,9 +1812,9 @@ module RDF::Tabular
1575
1812
  # Initialize titles
1576
1813
  columns = table["tableSchema"]["columns"] ||= []
1577
1814
  column = columns[index - skipCols] ||= {
1578
- "titles" => {"und" => []},
1815
+ "titles" => {lang => []},
1579
1816
  }
1580
- column["titles"]["und"] << value
1817
+ column["titles"][lang] << value
1581
1818
  end
1582
1819
  end
1583
1820
  debug("embedded_metadata") {"table: #{table.inspect}"}
@@ -1585,20 +1822,12 @@ module RDF::Tabular
1585
1822
 
1586
1823
  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
1587
1824
  end
1588
-
1589
- # Logic for accessing elements as accessors
1590
- def method_missing(method, *args)
1591
- if DEFAULTS.has_key?(method.to_sym)
1592
- # As set, or with default
1593
- object.fetch(method.to_sym, DEFAULTS[method.to_sym])
1594
- else
1595
- super
1596
- end
1597
- end
1598
1825
  end
1599
1826
 
1600
1827
  class Datatype < Metadata
1601
1828
  PROPERTIES = {
1829
+ :@id => :link,
1830
+ :@type => :atomic,
1602
1831
  base: :atomic,
1603
1832
  format: :atomic,
1604
1833
  length: :atomic,
@@ -1612,50 +1841,248 @@ module RDF::Tabular
1612
1841
  maxExclusive: :atomic,
1613
1842
  }.freeze
1614
1843
  REQUIRED = [].freeze
1615
- DEFAULTS = {}.freeze
1844
+ DEFAULTS = {
1845
+ base: "string"
1846
+ }.freeze
1616
1847
 
1617
1848
  # Override `base` in Metadata
1618
1849
  def base; object[:base]; end
1619
1850
 
1620
- # Setters
1851
+ # Getters and Setters
1621
1852
  PROPERTIES.each do |key, type|
1853
+ define_method(key) do
1854
+ object.fetch(key, DEFAULTS[key])
1855
+ end
1856
+
1622
1857
  define_method("#{key}=".to_sym) do |value|
1623
1858
  invalid = case key
1859
+ when :base
1860
+ "built-in datatype" unless DATATYPES.keys.map(&:to_s).include?(value)
1624
1861
  when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1625
1862
  "numeric or valid date/time" unless value.is_a?(Numeric) ||
1626
1863
  RDF::Literal::Date.new(value.to_s).valid? ||
1627
1864
  RDF::Literal::Time.new(value.to_s).valid? ||
1628
1865
  RDF::Literal::DateTime.new(value.to_s).valid?
1629
1866
  when :format
1630
- unless value.is_a?(String)
1631
- warn "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
1632
- if default_value(key).nil?
1633
- object.delete(key)
1634
- else
1635
- object[key] = default_value(key)
1867
+ case value
1868
+ when String
1869
+ nil
1870
+ when Hash
1871
+ unless (value.keys.map(&:to_s) - %w(groupChar decimalChar pattern)).empty?
1872
+ "an object containing only groupChar, decimalChar, and/or pattern"
1636
1873
  end
1874
+ else
1875
+ "a string or object"
1637
1876
  end
1638
1877
  when :length, :minLength, :maxLength
1639
1878
  if !(value.is_a?(Numeric) && value.integer? && value >= 0)
1640
1879
  "a non-negative integer"
1641
- elsif key != :length && object[:length] && value != object[:length]
1642
- # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
1643
- "both length and #{key} requires they be equal"
1644
1880
  end
1645
1881
  end
1646
1882
 
1647
1883
  if invalid
1648
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1649
- object[key] = default_value(key) unless default_value(key).nil?
1884
+ warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1885
+ object.delete(key)
1650
1886
  else
1651
1887
  object[key] = value
1652
1888
  end
1653
1889
  end
1654
1890
  end
1655
1891
 
1656
- # Logic for accessing elements as accessors
1657
- def method_missing(method, *args)
1658
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1892
+ ##
1893
+ # Parse the date format (if provided), and match against the value (if provided)
1894
+ # Otherwise, validate format and raise an error
1895
+ #
1896
+ # @param [String] format
1897
+ # @param [String] value
1898
+ # @return [String] XMLSchema version of value
1899
+ # @raise [ArgumentError] if format is not valid, or nil, if value does not match
1900
+ def parse_uax35_date(format, value)
1901
+ tz, date_format, time_format = nil, nil, nil
1902
+ return value unless format
1903
+ value ||= ""
1904
+
1905
+ # Extract tz info
1906
+ if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
1907
+ format, tz = md[1], md[2]
1908
+ end
1909
+
1910
+ date_format, time_format = format.split(' ')
1911
+ date_format, time_format = nil, date_format if self.base.to_sym == :time
1912
+
1913
+ # Extract date, of specified
1914
+ date_part = case date_format
1915
+ when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1916
+ when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1917
+ when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1918
+ when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1919
+ when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1920
+ when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1921
+ when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1922
+ when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1923
+ when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1924
+ when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1925
+ when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1926
+ when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1927
+ when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1928
+ when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1929
+ when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
1930
+ when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1931
+ when /yyyy-MM-ddTHH:mm:ss\.S+/
1932
+ md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1933
+ num_ms = date_format.match(/S+/).to_s.length
1934
+ md if md && md[:ms].length <= num_ms
1935
+ else
1936
+ raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
1937
+ nil
1938
+ end
1939
+
1940
+ # Forward past date part
1941
+ if date_part
1942
+ value = value[date_part.to_s.length..-1]
1943
+ value = value.lstrip if date_part && value.start_with?(' ')
1944
+ end
1945
+
1946
+ # Extract time, of specified
1947
+ time_part = case time_format
1948
+ when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1949
+ when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
1950
+ when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
1951
+ when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
1952
+ when /HH:mm:ss\.S+/
1953
+ md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1954
+ num_ms = time_format.match(/S+/).to_s.length
1955
+ md if md && md[:ms].length <= num_ms
1956
+ else
1957
+ raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
1958
+ nil
1959
+ end
1960
+
1961
+ # If there's a date_format but no date_part, match fails
1962
+ return nil if date_format && date_part.nil?
1963
+
1964
+ # If there's a time_format but no time_part, match fails
1965
+ return nil if time_format && time_part.nil?
1966
+
1967
+ # Forward past time part
1968
+ value = value[time_part.to_s.length..-1] if time_part
1969
+
1970
+ # Use datetime match for time
1971
+ time_part = date_part if date_part && date_part.names.include?("hr")
1972
+
1973
+ # If there's a timezone, it may optionally start with whitespace
1974
+ value = value.lstrip if tz.to_s.start_with?(' ')
1975
+ tz_part = value if tz
1976
+
1977
+ # Compose normalized value
1978
+ vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1979
+ vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
1980
+
1981
+ # Add milliseconds, if matched
1982
+ vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
1983
+
1984
+ value = [vd, vt].compact.join('T')
1985
+ value += tz_part.to_s
1986
+ end
1987
+
1988
+ ##
1989
+ # Parse the date format (if provided), and match against the value (if provided)
1990
+ # Otherwise, validate format and raise an error
1991
+ #
1992
+ # @param [String] pattern
1993
+ # @param [String] value
1994
+ # @param [String] groupChar
1995
+ # @param [String] decimalChar
1996
+ # @return [String] XMLSchema version of value or nil, if value does not match
1997
+ # @raise [ArgumentError] if format is not valid
1998
+ def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
1999
+ return value if pattern.to_s.empty?
2000
+ value ||= ""
2001
+
2002
+ re = build_number_re(pattern, groupChar, decimalChar)
2003
+
2004
+ # Upcase value and remove internal spaces
2005
+ value = value.upcase.gsub(/\s+/, '')
2006
+
2007
+ # Remove groupChar from value
2008
+ value = value.gsub(groupChar, '')
2009
+
2010
+ # Replace decimalChar with "."
2011
+ value = value.gsub(decimalChar, '.')
2012
+
2013
+ if value =~ re
2014
+ # result re-assembles parts removed from value
2015
+ value
2016
+ else
2017
+ # no match
2018
+ nil
2019
+ end
2020
+ end
2021
+
2022
+ # Build a regular expression from the provided pattern to match value, after suitable modifications
2023
+ #
2024
+ # @param [String] pattern
2025
+ # @param [String] groupChar
2026
+ # @param [String] decimalChar
2027
+ # @return [Regexp] Regular expression matching value
2028
+ # @raise [ArgumentError] if format is not valid
2029
+ def build_number_re(pattern, groupChar, decimalChar)
2030
+ # pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
2031
+ legal_number_pattern = /\A
2032
+ ([%‰])?
2033
+ ([+-])?
2034
+ # Mantissa
2035
+ (\#|#{groupChar == '.' ? '\.' : groupChar})*
2036
+ (0|#{groupChar == '.' ? '\.' : groupChar})*
2037
+ # Fractional
2038
+ (?:#{decimalChar == '.' ? '\.' : decimalChar}
2039
+ (0|#{groupChar == '.' ? '\.' : groupChar})*
2040
+ (\#|#{groupChar == '.' ? '\.' : groupChar})*
2041
+ # Exponent
2042
+ (E
2043
+ [+-]?
2044
+ (?:\#|#{groupChar == '.' ? '\.' : groupChar})*
2045
+ (?:0|#{groupChar == '.' ? '\.' : groupChar})*
2046
+ )?
2047
+ )?
2048
+ ([%‰])?
2049
+ \Z/x
2050
+
2051
+ unless pattern =~ legal_number_pattern
2052
+ raise ArgumentError, "unrecognized number pattern #{pattern}"
2053
+ end
2054
+
2055
+ # Remove groupChar from pattern
2056
+ pattern = pattern.gsub(groupChar, '')
2057
+
2058
+ # Replace decimalChar with "."
2059
+ pattern = pattern.gsub(decimalChar, '.')
2060
+
2061
+ # Split on decimalChar and E
2062
+ parts = pattern.split(/[\.E]/)
2063
+
2064
+ # Construct regular expression
2065
+ mantissa_str = case parts[0]
2066
+ when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
2067
+ when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
2068
+ when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
2069
+ end
2070
+
2071
+ fractional_str = case parts[1]
2072
+ when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
2073
+ when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
2074
+ when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
2075
+ end
2076
+ fractional_str = "\\.#{fractional_str}" if fractional_str
2077
+
2078
+ exponent_str = case parts[2]
2079
+ when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
2080
+ when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
2081
+ when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
2082
+ end
2083
+ exponent_str = "E#{exponent_str}" if exponent_str
2084
+
2085
+ Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
1659
2086
  end
1660
2087
  end
1661
2088
 
@@ -1697,6 +2124,10 @@ module RDF::Tabular
1697
2124
  "errors" => self.errors
1698
2125
  }.delete_if {|k,v| Array(v).empty?}
1699
2126
  end
2127
+
2128
+ def inspect
2129
+ self.class.name + to_atd.inspect
2130
+ end
1700
2131
  end
1701
2132
 
1702
2133
  # Row values, hashed by `name`
@@ -1715,6 +2146,16 @@ module RDF::Tabular
1715
2146
  # @return [Table]
1716
2147
  attr_reader :table
1717
2148
 
2149
+ #
2150
+ # Cells providing a unique row identifier
2151
+ # @return [Array<Cell>]
2152
+ attr_reader :primaryKey
2153
+
2154
+ #
2155
+ # Title(s) of this row
2156
+ # @return [Array<RDF::Literal>]
2157
+ attr_reader :titles
2158
+
1718
2159
  #
1719
2160
  # Context from Table with base set to table URL for expanding URI Templates
1720
2161
  # @return [JSON::LD::Context]
@@ -1725,8 +2166,10 @@ module RDF::Tabular
1725
2166
  # @param [Metadata] metadata for Table
1726
2167
  # @param [Integer] number 1-based row number after skipped/header rows
1727
2168
  # @param [Integer] source_number 1-based row number from source
2169
+ # @param [Hash{Symbol => Object}] options ({})
2170
+ # @option options [Boolean] :validate check for PK/FK consistency
1728
2171
  # @return [Row]
1729
- def initialize(row, metadata, number, source_number)
2172
+ def initialize(row, metadata, number, source_number, options = {})
1730
2173
  @table = metadata
1731
2174
  @number = number
1732
2175
  @sourceNumber = source_number
@@ -1748,7 +2191,7 @@ module RDF::Tabular
1748
2191
  end
1749
2192
 
1750
2193
  # Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
1751
- columns.each_with_index {|c, index| row[index] ||= (c.null || '')}
2194
+ columns.each_with_index {|c, index| row[index] ||= c.null}
1752
2195
 
1753
2196
  row.each_with_index do |value, index|
1754
2197
 
@@ -1764,7 +2207,7 @@ module RDF::Tabular
1764
2207
 
1765
2208
  @values << cell = Cell.new(metadata, column, self, value)
1766
2209
 
1767
- datatype = column.datatype || Datatype.new(base: "string", parent: column)
2210
+ datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
1768
2211
  value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
1769
2212
  value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
1770
2213
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
@@ -1787,23 +2230,35 @@ module RDF::Tabular
1787
2230
  v.strip!
1788
2231
  end
1789
2232
 
1790
- expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
2233
+ expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
1791
2234
  if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
1792
2235
  lit_or_errors
1793
2236
  else
1794
2237
  cell_errors += lit_or_errors
1795
- RDF::Literal(v, language: column.lang)
2238
+ RDF::Literal(v, language: (column.lang unless column.lang == "und"))
1796
2239
  end
1797
2240
  end
1798
2241
  end.compact
1799
2242
 
2243
+ # Check for required values
2244
+ if column.required && (cell_values.any? {|v| v.to_s.empty?} || cell_values.empty?)
2245
+ cell_errors << "Required column has empty value(s): #{cell_values.map(&:to_s).inspect}"
2246
+ end
1800
2247
  cell.value = (column.separator ? cell_values : cell_values.first)
1801
2248
  cell.errors = cell_errors
1802
- metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
1803
2249
 
1804
2250
  map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
1805
2251
  end
1806
2252
 
2253
+ # Record primaryKey if validating
2254
+ @primaryKey = @values.
2255
+ select {|cell| Array(table.tableSchema.primaryKey).include?(cell.column.name)} if options[:validate]
2256
+
2257
+ # Record any row titles
2258
+ @titles = @values.
2259
+ select {|cell| Array(table.tableSchema.rowTitles).include?(cell.column.name)}.
2260
+ map(&:value)
2261
+
1807
2262
  # Map URLs for row
1808
2263
  @values.each_with_index do |cell, index|
1809
2264
  mapped_values = map_values.merge(
@@ -1824,30 +2279,24 @@ module RDF::Tabular
1824
2279
  {
1825
2280
  "@id" => id.to_s,
1826
2281
  "@type" => "Row",
1827
- "table" => (table.id.to_s if table.id),
2282
+ "table" => (table.id || table.url),
1828
2283
  "number" => self.number,
1829
2284
  "sourceNumber" => self.sourceNumber,
1830
- "cells" => @values.map(&:to_atd)
2285
+ "cells" => @values.map(&:value)
1831
2286
  }.delete_if {|k,v| v.nil?}
1832
2287
  end
1833
2288
 
2289
+ def inspect
2290
+ self.class.name + to_atd.inspect
2291
+ end
2292
+
1834
2293
  private
1835
2294
  #
1836
2295
  # given a datatype specification, return a literal matching that specififcation, if found, otherwise nil
1837
2296
  # @return [RDF::Literal]
1838
2297
  def value_matching_datatype(value, datatype, expanded_dt, language)
1839
- value_errors = []
1840
-
1841
- # Check constraints
1842
- if datatype.length && value.length != datatype.length
1843
- value_errors << "#{value} does not have length #{datatype.length}"
1844
- end
1845
- if datatype.minLength && value.length < datatype.minLength
1846
- value_errors << "#{value} does not have length >= #{datatype.minLength}"
1847
- end
1848
- if datatype.maxLength && value.length > datatype.maxLength
1849
- value_errors << "#{value} does not have length <= #{datatype.maxLength}"
1850
- end
2298
+ lit, value_errors = nil, []
2299
+ original_value = value.dup
1851
2300
 
1852
2301
  format = datatype.format
1853
2302
  # Datatype specific constraints and conversions
@@ -1857,29 +2306,39 @@ module RDF::Tabular
1857
2306
  :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
1858
2307
  :nonPositiveInteger, :negativeInteger,
1859
2308
  :double, :float, :number
2309
+
1860
2310
  # Normalize representation based on numeric-specific facets
1861
- format ||= {}
1862
- groupChar = format[:groupChar] || ','
1863
- if format[:pattern] && !value.match(Regexp.new(format[:pattern]))
1864
- # pattern facet failed
1865
- value_errors << "#{value} does not match pattern #{format[:pattern]}"
2311
+
2312
+ format = case format
2313
+ when String then {"pattern" => format}
2314
+ when Hash then format
2315
+ else {}
1866
2316
  end
1867
- if value.include?(groupChar*2)
1868
- # pattern facet failed
1869
- value_errors << "#{value} has repeating #{groupChar.inspect}"
2317
+
2318
+ groupChar = format["groupChar"]
2319
+ decimalChar = format["decimalChar"] || '.'
2320
+ pattern = format["pattern"]
2321
+
2322
+ if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
2323
+ value_errors << "#{value} does not match pattern #{pattern}"
1870
2324
  end
1871
- value.gsub!(groupChar, '')
1872
- value.sub!(format[:decimalChar], '.') if format[:decimalChar]
2325
+
2326
+ # pattern facet failed
2327
+ value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
2328
+ value = value.gsub(groupChar, '') if groupChar
2329
+ value = value.sub(decimalChar, '.')
1873
2330
 
1874
2331
  # Extract percent or per-mille sign
1875
2332
  percent = permille = false
1876
- case value
1877
- when /%$/
1878
- value = value[0..-2]
1879
- percent = true
1880
- when /‰$/
1881
- value = value[0..-2]
1882
- permille = true
2333
+ if groupChar
2334
+ case value
2335
+ when /%/
2336
+ value = value.sub('%', '')
2337
+ percent = true
2338
+ when /‰/
2339
+ value = value.sub('‰', '')
2340
+ permille = true
2341
+ end
1883
2342
  end
1884
2343
 
1885
2344
  lit = RDF::Literal(value, datatype: expanded_dt)
@@ -1889,129 +2348,117 @@ module RDF::Tabular
1889
2348
  o = o / 1000 if permille
1890
2349
  lit = RDF::Literal(o, datatype: expanded_dt)
1891
2350
  end
2351
+
2352
+ if !lit.plain? && datatype.minimum && lit < datatype.minimum
2353
+ value_errors << "#{value} < minimum #{datatype.minimum}"
2354
+ end
2355
+ case
2356
+ when datatype.minimum && lit < datatype.minimum
2357
+ value_errors << "#{value} < minimum #{datatype.minimum}"
2358
+ when datatype.maximum && lit > datatype.maximum
2359
+ value_errors << "#{value} > maximum #{datatype.maximum}"
2360
+ when datatype.minInclusive && lit < datatype.minInclusive
2361
+ value_errors << "#{value} < minInclusive #{datatype.minInclusive}"
2362
+ when datatype.maxInclusive && lit > datatype.maxInclusive
2363
+ value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}"
2364
+ when datatype.minExclusive && lit <= datatype.minExclusive
2365
+ value_errors << "#{value} <= minExclusive #{datatype.minExclusive}"
2366
+ when datatype.maxExclusive && lit >= datatype.maxExclusive
2367
+ value_errors << "#{value} ?= maxExclusive #{datatype.maxExclusive}"
2368
+ end
1892
2369
  when :boolean
1893
- lit = if format
2370
+ if format
1894
2371
  # True/False determined by Y|N values
1895
2372
  t, f = format.to_s.split('|', 2)
1896
2373
  case
1897
2374
  when value == t
1898
- value = RDF::Literal::TRUE
2375
+ lit = RDF::Literal::TRUE
1899
2376
  when value == f
1900
- value = RDF::Literal::FALSE
2377
+ lit = RDF::Literal::FALSE
1901
2378
  else
1902
2379
  value_errors << "#{value} does not match boolean format #{format}"
1903
- RDF::Literal::Boolean.new(value)
1904
2380
  end
1905
2381
  else
1906
2382
  if %w(1 true).include?(value.downcase)
1907
- RDF::Literal::TRUE
2383
+ lit = RDF::Literal::TRUE
1908
2384
  elsif %w(0 false).include?(value.downcase)
1909
- RDF::Literal::FALSE
2385
+ lit = RDF::Literal::FALSE
2386
+ else
2387
+ value_errors << "#{value} does not match boolean"
1910
2388
  end
1911
2389
  end
1912
2390
  when :date, :time, :dateTime, :dateTimeStamp, :datetime
1913
- # Match values
1914
- tz, date_format, time_format = nil, nil, nil
1915
-
1916
- # Extract tz info
1917
- if format && (md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/))
1918
- format, tz = md[1], md[2]
2391
+ if value = datatype.parse_uax35_date(format, value)
2392
+ lit = RDF::Literal(value, datatype: expanded_dt)
2393
+ else
2394
+ value_errors << "#{original_value} does not match format #{format}"
1919
2395
  end
1920
-
1921
- if format
1922
- date_format, time_format = format.split(' ')
1923
- if datatype.base.to_sym == :time
1924
- date_format, time_format = nil, date_format
1925
- end
1926
-
1927
- # Extract date, of specified
1928
- date_part = case date_format
1929
- when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1930
- when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1931
- when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1932
- when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1933
- when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1934
- when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1935
- when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1936
- when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1937
- when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1938
- when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1939
- when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1940
- when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1941
- when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1942
- when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1943
- when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
1944
- when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
1945
- else
1946
- value_errors << "unrecognized date/time format #{date_format}" if date_format
1947
- nil
2396
+ when :duration, :dayTimeDuration, :yearMonthDuration
2397
+ # SPEC CONFUSION: surely format also includes that for other duration types?
2398
+ re = Regexp.new(format) rescue nil
2399
+ if re.nil? ||value.match(re)
2400
+ lit = RDF::Literal(value, datatype: expanded_dt)
2401
+ else
2402
+ value_errors << "#{value} does not match format #{format}"
2403
+ end
2404
+ when :hexBinary, :base64Binary
2405
+ lit = RDF::Literal.new(value, datatype: expanded_dt)
2406
+ unless lit.valid?
2407
+ value_errors << "#{value} is invalid"
2408
+ lit = RDF::Literal.new(value)
2409
+ else
2410
+ if datatype.length && lit.object.length != datatype.length
2411
+ value_errors << "decoded #{value} does not have length #{datatype.length}"
1948
2412
  end
1949
-
1950
- # Forward past date part
1951
- if date_part
1952
- value = value[date_part.to_s.length..-1]
1953
- value = value.lstrip if date_part && value.start_with?(' ')
2413
+ if datatype.minLength && lit.object.length < datatype.minLength
2414
+ value_errors << "decoded #{value} does not have length >= #{datatype.length}"
1954
2415
  end
1955
-
1956
- # Extract time, of specified
1957
- time_part = case time_format
1958
- when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
1959
- when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})/)
1960
- when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
1961
- when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)/)
1962
- else
1963
- value_errors << "unrecognized date/time format #{time_format}" if time_format
1964
- nil
2416
+ if datatype.maxLength && lit.object.length < datatype.maxLength
2417
+ value_errors << "decoded #{value} does not have length <= #{datatype.length}"
1965
2418
  end
1966
-
1967
- # Forward past time part
1968
- value = value[time_part.to_s.length..-1] if time_part
1969
-
1970
- # Use datetime match for time
1971
- time_part = date_part if date_part && date_part.names.include?("hr")
1972
-
1973
- # If there's a timezone, it may optionally start with whitespace
1974
- value = value.lstrip if tz.to_s.start_with?(' ')
1975
- tz_part = value if tz
1976
-
1977
- # Compose normalized value
1978
- vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1979
- vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
1980
- value = [vd, vt].compact.join('T')
1981
- value += tz_part.to_s
1982
2419
  end
1983
-
1984
- lit = RDF::Literal(value, datatype: expanded_dt)
1985
- when :duration, :dayTimeDuration, :yearMonthDuration
1986
- # SPEC CONFUSION: surely format also includes that for other duration types?
1987
- lit = RDF::Literal(value, datatype: expanded_dt)
1988
2420
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
1989
2421
  :ENTITY, :ID, :IDREF, :NOTATION
1990
2422
  value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
1991
2423
  else
1992
2424
  # For other types, format is a regexp
1993
- unless format.nil? || value.match(Regexp.new(format))
2425
+ re = Regexp.new(format) rescue nil
2426
+ unless re.nil? || value.match(re)
1994
2427
  value_errors << "#{value} does not match format #{format}"
1995
2428
  end
1996
2429
  lit = if value_errors.empty?
1997
2430
  if expanded_dt == RDF::XSD.string
1998
2431
  # Type string will still use language
1999
- RDF::Literal(value, language: language)
2432
+ RDF::Literal(value, language: (language unless language == "und"))
2000
2433
  else
2001
2434
  RDF::Literal(value, datatype: expanded_dt)
2002
2435
  end
2003
2436
  end
2004
2437
  end
2005
2438
 
2439
+ if datatype.length && value.to_s.length != datatype.length && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
2440
+ value_errors << "#{value} does not have length #{datatype.length}"
2441
+ end
2442
+ if datatype.minLength && value.to_s.length < datatype.minLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
2443
+ value_errors << "#{value} does not have length >= #{datatype.minLength}"
2444
+ end
2445
+ if datatype.maxLength && value.to_s.length > datatype.maxLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
2446
+ value_errors << "#{value} does not have length <= #{datatype.maxLength}"
2447
+ end
2448
+
2449
+ # value constraints
2450
+ value_errors << "#{value} < minimum #{datatype.minimum}" if datatype.minimum && lit < datatype.minimum
2451
+ value_errors << "#{value} > maximum #{datatype.maximum}" if datatype.maximum && lit > datatype.maximum
2452
+ value_errors << "#{value} < minInclusive #{datatype.minInclusive}" if datatype.minInclusive && lit < datatype.minInclusive
2453
+ value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}" if datatype.maxInclusive && lit > datatype.maxInclusive
2454
+ value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" if datatype.minExclusive && lit <= datatype.minExclusive
2455
+ value_errors << "#{value} >= maxExclusive #{datatype.maxExclusive}" if datatype.maxExclusive && lit >= datatype.maxExclusive
2456
+
2006
2457
  # Final value is a valid literal, or a plain literal otherwise
2007
2458
  value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
2008
2459
 
2009
- # FIXME Value constraints
2010
-
2460
+ # Either return matched literal value or errors
2011
2461
  value_errors.empty? ? lit : value_errors
2012
2462
  end
2013
2463
  end
2014
-
2015
- # Metadata errors detected
2016
- class Error < StandardError; end
2017
2464
  end