rdf-tabular 0.1.3.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ -# This template is used for generating a rollup EARL report. It expects to be
2
+ -# called with a single _tests_ local with the following structure
3
+ - require 'cgi'
4
+ - require 'digest'
5
+
6
+ !!! 5
7
+ %html{:prefix => "earl: http://www.w3.org/ns/earl# doap: http://usefulinc.com/ns/doap# mf: http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"}
8
+ - subjects = tests['testSubjects']
9
+ %head
10
+ %meta{"http-equiv" => "Content-Type", :content => "text/html;charset=utf-8"}
11
+ %meta{name: "viewport", content: "width=device-width, initial-scale=1.0"}
12
+ %link{rel: "stylesheet", type: "text/css", href: "https://www.w3.org/StyleSheets/TR/base"}
13
+ %title
14
+ = tests['name']
15
+ Implementation Report
16
+ :css
17
+ span[property='dc:description'] { display: none; }
18
+ td.PASS { color: green; }
19
+ td.FAIL { color: red; }
20
+ table.report {
21
+ border-width: 1px;
22
+ border-spacing: 2px;
23
+ border-style: outset;
24
+ border-color: gray;
25
+ border-collapse: separate;
26
+ background-color: white;
27
+ }
28
+ table.report th {
29
+ border-width: 1px;
30
+ padding: 1px;
31
+ border-style: inset;
32
+ border-color: gray;
33
+ background-color: white;
34
+ -moz-border-radius: ;
35
+ }
36
+ table.report td {
37
+ border-width: 1px;
38
+ padding: 1px;
39
+ border-style: inset;
40
+ border-color: gray;
41
+ background-color: white;
42
+ -moz-border-radius: ;
43
+ }
44
+ tr.summary {font-weight: bold;}
45
+ td.passed-all {color: green;}
46
+ td.passed-most {color: darkorange;}
47
+ td.passed-some {color: red;}
48
+ td.passed-none {color: gray;}
49
+ em.rfc2119 {
50
+ text-transform: lowercase;
51
+ font-variant: small-caps;
52
+ font-style: normal;
53
+ color: #900;
54
+ }
55
+ a.testlink {
56
+ color: inherit;
57
+ text-decoration: none;
58
+ }
59
+ a.testlink:hover {
60
+ text-decoration: underline;
61
+ }
62
+ %body
63
+ - subject_refs = {}
64
+ - tests['entries'].each {|m| m['title'] ||= m['description']}
65
+ %section{:about => tests['@id'], typeof: Array(tests['@type']).join(" ")}
66
+ %h2
67
+ Ruby rdf-tabular gem test results
68
+ %p
69
+ This document reports conformance for for the following specifications:
70
+ %ul
71
+ %li
72
+ %a{property: "doap:name", href: "http://www.w3.org/TR/tabular-data-model/"}="MetaModel for Tabular Data and Metadata on the Web"
73
+ %li
74
+ %a{property: "doap:name", href: "http://www.w3.org/TR/tabular-metadata/"}="Metadata Vocabulary for Tabular Data"
75
+ %li
76
+ %a{property: "doap:name", href: "http://www.w3.org/TR/csv2rdf/"}="Generating RDF from Tabular Data on the Web"
77
+ %li
78
+ %a{property: "doap:name", href: "http://www.w3.org/TR/csv2json/"}="Generating JSON from Tabular Data on the Web"
79
+ %p
80
+ This report is also available in
81
+ %a{:href => "earl.ttl"}
82
+ Turtle
83
+ %dl
84
+ - subjects.each_with_index do |subject, index|
85
+ - subject_refs[subject['@id']] = "subj_#{index}"
86
+ %dt{:id => subject_refs[subject['@id']]}
87
+ %a{:href => subject['@id']}
88
+ %span{:about => subject['@id'], property: "doap:name"}<= subject['name']
89
+ %dd{property: "earl:testSubjects", resource: subject['@id'], typeof: Array(subject['@type']).join(" "), :inlist => true}
90
+ %dl
91
+ - if subject['doapDesc']
92
+ %dt= "Description"
93
+ %dd{property: "doap:description", :lang => 'en'}<
94
+ ~ CGI.escapeHTML subject['doapDesc']
95
+ - if subject['language']
96
+ %dt= "Programming Language"
97
+ %dd{property: "doap:programming-language"}<
98
+ ~ CGI.escapeHTML subject['language']
99
+ - if subject['homepage']
100
+ %dt= "Home Page"
101
+ %dd{property: "doap:homepage"}
102
+ %a{href: subject['homepage']}
103
+ ~ CGI.escapeHTML subject['homepage']
104
+ - if subject['developer']
105
+ %dt= "Developer"
106
+ %dd{:rel => "doap:developer"}
107
+ - subject['developer'].each do |dev|
108
+ %div{resource: dev['@id'], typeof: Array(dev['@type']).join(" ")}
109
+ - if dev.has_key?('@id')
110
+ %a{:href => dev['@id']}
111
+ %span{property: "foaf:name"}<
112
+ ~ CGI.escapeHTML dev['foaf:name']
113
+ - else
114
+ %span{property: "foaf:name"}<
115
+ ~ CGI.escapeHTML dev['foaf:name']
116
+ - if dev['foaf:homepage']
117
+ %a{property: "foaf:homepage", href: dev['foaf:homepage']}
118
+ ~ CGI.escapeHTML dev['foaf:homepage']
119
+ %dt
120
+ Test Suite Compliance
121
+ %dd
122
+ %table.report
123
+ %tbody
124
+ - tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
125
+ - passed = manifest['entries'].select {|t| t['assertions'][index]['result']['outcome'] == 'earl:passed' }.length
126
+ - total = manifest['entries'].length
127
+ - pct = (passed * 100.0) / total
128
+ - cls = (pct == 100.0 ? 'passed-all' : (pct >= 85.0) ? 'passed-most' : (pct == 0.0 ? 'passed-none' : 'passed-some'))
129
+ %tr
130
+ %td
131
+ %a{href: "##{manifest['title']}"}
132
+ ~ manifest['title']
133
+ %td{:class => cls}
134
+ = pct == 0.0 ? "Untested" : "#{passed}/#{total} (#{'%.1f' % pct}%)"
135
+ %section
136
+ %h2
137
+ Individual Test Results
138
+ - tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
139
+ - test_cases = manifest['entries']
140
+ %section{id: manifest['title'], typeof: manifest['@type'].join(" "), resource: manifest['@id']}
141
+ %h2{property: "dc:title mf:name"}<=manifest['title']
142
+ - Array(manifest['description']).each do |desc|
143
+ %p{property: "rdfs:comment"}<
144
+ ~ CGI.escapeHTML desc
145
+ %table.report
146
+ - skip_subject = {}
147
+ - passed_tests = []
148
+ %tr
149
+ %th
150
+ Test
151
+ - subjects.each_with_index do |subject, index|
152
+ - subject_refs[subject['@id']] = "subj_#{index}"
153
+ -# If subject is untested for every test in this manifest, skip it
154
+ - skip_subject[subject['@id']] = manifest['entries'].all? {|t| t['assertions'][index]['result']['outcome'] == 'earl:untested'}
155
+ - unless skip_subject[subject['@id']]
156
+ %th
157
+ %a{:href => '#' + subject_refs[subject['@id']]}<=subject['name']
158
+ - test_cases.each do |test|
159
+ %tr{:rel => "mf:entries", typeof: test['@type'].join(" "), resource: test['@id'], :inlist => true}
160
+ %td
161
+ = "Test #{test['@id'].split("#").last}: #{CGI.escapeHTML test['title']}"
162
+ - test['assertions'].each_with_index do |assertion, ndx|
163
+ - next if skip_subject[assertion['subject']]
164
+ - pass_fail = assertion['result']['outcome'].split(':').last.upcase.sub(/(PASS|FAIL)ED$/, '\1')
165
+ - passed_tests[ndx] = (passed_tests[ndx] || 0) + (pass_fail == 'PASS' ? 1 : 0)
166
+ %td{:class => pass_fail, property: "earl:assertions", typeof: assertion['@type'], :inlist => true}
167
+ - if assertion['assertedBy']
168
+ %link{property: "earl:assertedBy", :href => assertion['assertedBy']}
169
+ %link{property: "earl:test", :href => assertion['test']}
170
+ %link{property: "earl:subject", :href => assertion['subject']}
171
+ - if assertion['mode']
172
+ %link{property: 'earl:mode', :href => assertion['mode']}
173
+ %span{property: "earl:result", typeof: assertion['result']['@type']}
174
+ %span{property: 'earl:outcome', resource: assertion['result']['outcome']}
175
+ = pass_fail
176
+ %tr.summary
177
+ %td
178
+ = "Percentage passed out of #{manifest['entries'].length} Tests"
179
+ - passed_tests.compact.each do |r|
180
+ - pct = (r * 100.0) / manifest['entries'].length
181
+ %td{:class => (pct == 100.0 ? 'passed-all' : (pct >= 95.0 ? 'passed-most' : 'passed-some'))}
182
+ = "#{'%.1f' % pct}%"
183
+ %section#appendix{property: "earl:generatedBy", resource: tests['generatedBy']['@id'], typeof: tests['generatedBy']['@type']}
184
+ %h2
185
+ Report Generation Software
186
+ - doap = tests['generatedBy']
187
+ - rel = doap['release']
188
+ %p
189
+ This report generated by
190
+ %span{property: "doap:name"}<
191
+ %a{:href => tests['generatedBy']['@id']}<
192
+ = doap['name']
193
+ %meta{property: "doap:shortdesc", :content => doap['shortdesc'], :lang => 'en'}
194
+ %meta{property: "doap:description", :content => doap['doapDesc'], :lang => 'en'}
195
+ version
196
+ %span{property: "doap:release", resource: rel['@id'], typeof: 'doap:Version'}
197
+ %span{property: "doap:revision"}<=rel['revision']
198
+ %meta{property: "doap:name", :content => rel['name']}
199
+ %meta{property: "doap:created", :content => rel['created'], :datatype => "xsd:date"}
200
+ an
201
+ %a{property: "doap:license", :href => doap['license']}<="Unlicensed"
202
+ %span{property: "doap:programming-language"}<="Ruby"
203
+ application. More information is available at
204
+ %a{property: "doap:homepage", :href => doap['homepage']}<=doap['homepage']
205
+ = "."
@@ -0,0 +1,4 @@
1
+ {+url}-metadata.json
2
+ csv-metadata.json
3
+ {+url}.json
4
+ csvm.json
@@ -1,9 +1,5 @@
1
1
  $:.unshift(File.expand_path("..", __FILE__))
2
2
  require 'rdf' # @see http://rubygems.org/gems/rdf
3
- begin
4
- require 'byebug' # REMOVE ME
5
- rescue LoadError
6
- end
7
3
  require 'csv'
8
4
 
9
5
  module RDF
@@ -28,6 +24,16 @@ module RDF
28
24
  autoload :Transformation, 'rdf/tabular/metadata'
29
25
  autoload :VERSION, 'rdf/tabular/version'
30
26
 
27
+ # Metadata errors detected
28
+ class Error < RDF::ReaderError; end
29
+
30
+ # Relative location of site-wide configuration file
31
+ SITE_WIDE_CONFIG = "/.well-known/csvm".freeze
32
+ SITE_WIDE_DEFAULT = %(
33
+ {+url}-metadata.json
34
+ csv-metadata.json
35
+ ).gsub(/^\s+/, '').freeze
36
+
31
37
  def self.debug?; @debug; end
32
38
  def self.debug=(value); @debug = value; end
33
39
  end
@@ -8,12 +8,13 @@ module RDF::Tabular
8
8
  # RDF::Format.for(:tsv) #=> RDF::Tabular::Format
9
9
  # RDF::Format.for("etc/foaf.csv")
10
10
  # RDF::Format.for("etc/foaf.tsv")
11
- # RDF::Format.for(:file_name => "etc/foaf.csv")
12
- # RDF::Format.for(:file_name => "etc/foaf.tsv")
13
- # RDF::Format.for(:file_extension => "csv")
14
- # RDF::Format.for(:file_extension => "tsv")
15
- # RDF::Format.for(:content_type => "text/csv")
16
- # RDF::Format.for(:content_type => "text/tab-separated-values")
11
+ # RDF::Format.for(file_name: "etc/foaf.csv")
12
+ # RDF::Format.for(file_name: "etc/foaf.tsv")
13
+ # RDF::Format.for(file_extension: "csv")
14
+ # RDF::Format.for(file_extension: "tsv")
15
+ # RDF::Format.for(content_type: "text/csv")
16
+ # RDF::Format.for(content_type: "text/tab-separated-values")
17
+ # RDF::Format.for(content_type: "application/csvm+json")
17
18
  #
18
19
  # @example Obtaining serialization format MIME types
19
20
  # RDF::Format.content_types #=> {"text/csv" => [RDF::Tabular::Format]}
@@ -25,7 +26,10 @@ module RDF::Tabular
25
26
  class Format < RDF::Format
26
27
  content_type 'text/csv',
27
28
  extensions: [:csv, :tsv],
28
- alias: 'text/tab-separated-values'
29
+ alias: %w{
30
+ text/tab-separated-values
31
+ application/csvm+json
32
+ }
29
33
  content_encoding 'utf-8'
30
34
 
31
35
  reader { RDF::Tabular::Reader }
@@ -11,8 +11,7 @@ require 'yaml' # used by BCP47, which should have required it.
11
11
  # CSVM Metadata processor
12
12
  #
13
13
  # * Extracts Metadata from file or Hash definition
14
- # * Merges multiple Metadata definitions
15
- # * Extract Metadata from a CSV file
14
+ # * Extract Embedded Metadata from a CSV file
16
15
  # * Return table-level annotations
17
16
  # * Return Column-level annotations
18
17
  # * Return row iterator with column information
@@ -45,30 +44,28 @@ module RDF::Tabular
45
44
  valueUrl: :uri_template,
46
45
  }.freeze
47
46
  INHERITED_DEFAULTS = {
48
- aboutUrl: "".freeze,
49
47
  default: "".freeze,
50
48
  lang: "und",
51
49
  null: "".freeze,
52
50
  ordered: false,
53
- propertyUrl: "".freeze,
54
51
  required: false,
55
52
  textDirection: "ltr".freeze,
56
- valueUrl: "".freeze,
57
53
  }.freeze
58
54
 
59
55
  # Valid datatypes
60
56
  DATATYPES = {
61
- anyAtomicType: RDF::XSD.anySimpleType,
57
+ anyAtomicType: RDF::XSD.anyAtomicType,
62
58
  anyURI: RDF::XSD.anyURI,
63
59
  base64Binary: RDF::XSD.basee65Binary,
64
60
  boolean: RDF::XSD.boolean,
65
61
  byte: RDF::XSD.byte,
66
62
  date: RDF::XSD.date,
67
63
  dateTime: RDF::XSD.dateTime,
68
- dateTimeDuration: RDF::XSD.dateTimeDuration,
64
+ dayTimeDuration: RDF::XSD.dayTimeDuration,
69
65
  dateTimeStamp: RDF::XSD.dateTimeStamp,
70
66
  decimal: RDF::XSD.decimal,
71
67
  double: RDF::XSD.double,
68
+ duration: RDF::XSD.duration,
72
69
  float: RDF::XSD.float,
73
70
  ENTITY: RDF::XSD.ENTITY,
74
71
  gDay: RDF::XSD.gDay,
@@ -84,6 +81,7 @@ module RDF::Tabular
84
81
  Name: RDF::XSD.Name,
85
82
  NCName: RDF::XSD.NCName,
86
83
  negativeInteger: RDF::XSD.negativeInteger,
84
+ NMTOKEN: RDF::XSD.NMTOKEN,
87
85
  nonNegativeInteger: RDF::XSD.nonNegativeInteger,
88
86
  nonPositiveInteger: RDF::XSD.nonPositiveInteger,
89
87
  normalizedString: RDF::XSD.normalizedString,
@@ -100,7 +98,7 @@ module RDF::Tabular
100
98
  unsignedShort: RDF::XSD.unsignedShort,
101
99
  yearMonthDuration: RDF::XSD.yearMonthDuration,
102
100
 
103
- any: RDF::XSD.anySimpleType,
101
+ any: RDF::XSD.anyAtomicType,
104
102
  binary: RDF::XSD.base64Binary,
105
103
  datetime: RDF::XSD.dateTime,
106
104
  html: RDF.HTML,
@@ -115,7 +113,7 @@ module RDF::Tabular
115
113
 
116
114
  # Local version of the context
117
115
  # @return [JSON::LD::Context]
118
- LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__))
116
+ LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__)).freeze
119
117
 
120
118
  # ID of this Metadata
121
119
  # @return [RDF::URI]
@@ -139,7 +137,9 @@ module RDF::Tabular
139
137
  #
140
138
  # @param [String] path
141
139
  # @param [Hash{Symbol => Object}] options
142
- # see `RDF::Util::File.open_file` in RDF.rb
140
+ # see `RDF::Util::File.open_file` in RDF.rb and {#new}
141
+ # @yield [Metadata]
142
+ # @raise [IOError] if file not found
143
143
  def self.open(path, options = {})
144
144
  options = options.merge(
145
145
  headers: {
@@ -152,8 +152,25 @@ module RDF::Tabular
152
152
  end
153
153
  end
154
154
 
155
+ # Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
156
+ # @param [String] base, the URL used for finding the file
157
+ # @return [Array<String>, false]
158
+ def self.site_wide_config(base)
159
+ require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
160
+ @cache ||= RDF::Util::Cache.new(-1)
161
+
162
+ config_loc = RDF::URI(base).join(SITE_WIDE_CONFIG).to_s
163
+ # Only load if we haven't tried before. Use `SITE_WIDE_DEFAULT` if not found
164
+ if @cache[config_loc].nil?
165
+ @cache[config_loc] = RDF::Util::File.open_file(config_loc) do |rd|
166
+ rd.each_line.to_a
167
+ end rescue SITE_WIDE_DEFAULT.split
168
+ end
169
+ @cache[config_loc]
170
+ end
171
+
155
172
  ##
156
- # Return metadata for a file, based on user-specified and path-relative locations from an input file
173
+ # Return metadata for a file, based on user-specified, linked, and site-wide location configuration from an input file
157
174
  # @param [IO, StringIO] input
158
175
  # @param [Hash{Symbol => Object}] options
159
176
  # @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
@@ -175,22 +192,46 @@ module RDF::Tabular
175
192
  # Search for metadata until found
176
193
 
177
194
  # load link metadata, if available
178
- locs = []
179
- if input.respond_to?(:links) &&
195
+ all_locs = []
196
+ if !metadata && input.respond_to?(:links) &&
180
197
  link = input.links.find_link(%w(rel describedby))
181
- locs << RDF::URI(base).join(link.href)
198
+ link_loc = RDF::URI(base).join(link.href).to_s
199
+ md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
200
+ all_locs << link_loc if md
201
+ # Metadata must describe file to be useful
202
+ metadata = md if md && md.describes_file?(base)
182
203
  end
183
204
 
184
- if base
185
- locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
205
+ locs = []
206
+ # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
207
+ if !metadata && base
208
+ templates = site_wide_config(base)
209
+ debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
210
+ locs = templates.map do |template|
211
+ t = Addressable::Template.new(template)
212
+ RDF::URI(base).join(t.expand(url: base).to_s)
213
+ end
214
+ debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
215
+
216
+ locs.each do |loc|
217
+ metadata ||= begin
218
+ md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
219
+ # Metadata must describe file to be useful
220
+ all_locs << loc if md
221
+ md if md && md.describes_file?(base)
222
+ rescue IOError
223
+ debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
224
+ nil
225
+ end
226
+ end
186
227
  end
187
228
 
188
- locs.each do |loc|
189
- metadata ||= begin
190
- Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
191
- rescue
192
- debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
193
- nil
229
+ # If Metadata was found, but no metadata describes the file, issue a warning
230
+ if !all_locs.empty? && !metadata
231
+ warnings = options.fetch(:warnings, [])
232
+ warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
233
+ if options[:validate] && !options[:warnings]
234
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
194
235
  end
195
236
  end
196
237
 
@@ -198,7 +239,7 @@ module RDF::Tabular
198
239
  metadata = case
199
240
  when metadata then metadata
200
241
  when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
201
- else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: []}, options)
242
+ else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
202
243
  end
203
244
 
204
245
  # Make TableGroup, if not already
@@ -217,6 +258,8 @@ module RDF::Tabular
217
258
  else ::JSON.parse(input.to_s)
218
259
  end
219
260
 
261
+ raise ::JSON::ParserError unless object.is_a?(Hash)
262
+
220
263
  unless options[:parent]
221
264
  # Add context, if not set (which it should be)
222
265
  object['@context'] ||= options.delete(:@context) || options[:context]
@@ -237,7 +280,7 @@ module RDF::Tabular
237
280
  when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
238
281
  when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
239
282
  when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
240
- when %w(columns primaryKey foreignKeys).any? {|k| object_keys.include?(k)} then :Schema
283
+ when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
241
284
  when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
242
285
  when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
243
286
  when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
@@ -251,13 +294,15 @@ module RDF::Tabular
251
294
  when :Column then RDF::Tabular::Column
252
295
  when :Dialect then RDF::Tabular::Dialect
253
296
  else
254
- raise Error, "Unkown metadata type: #{type.inspect}"
297
+ raise Error, "Unknown metadata type: #{type.inspect}"
255
298
  end
256
299
  end
257
300
 
258
301
  md = klass.allocate
259
302
  md.send(:initialize, object, options)
260
303
  md
304
+ rescue ::JSON::ParserError
305
+ raise Error, "Expected input to be a JSON Object"
261
306
  end
262
307
 
263
308
  ##
@@ -271,6 +316,8 @@ module RDF::Tabular
271
316
  # Context used for this metadata. Taken from input if not provided
272
317
  # @option options [RDF::URI] :base
273
318
  # The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
319
+ # @option options [Boolean] :normalize normalize the object
320
+ # @option options [Boolean] :validate Strict metadata validation
274
321
  # @raise [Error]
275
322
  # @return [Metadata]
276
323
  def initialize(input, options = {})
@@ -285,15 +332,15 @@ module RDF::Tabular
285
332
  @context = case input['@context']
286
333
  when Array
287
334
  warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
288
- LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
335
+ LOCAL_CONTEXT.dup.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
289
336
  when Hash
290
337
  warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
291
- LOCAL_CONTEXT.parse(input['@context'])
292
- when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT
338
+ LOCAL_CONTEXT.dup.parse(input['@context'])
339
+ when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT.dup
293
340
  else
294
341
  if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
295
342
  warn "Context missing required value 'http://www.w3.org/ns/csvw'"
296
- LOCAL_CONTEXT
343
+ LOCAL_CONTEXT.dup
297
344
  end
298
345
  end
299
346
 
@@ -326,17 +373,17 @@ module RDF::Tabular
326
373
  when :url
327
374
  # URL of CSV relative to metadata
328
375
  object[:url] = value
329
- @url = base.join(value)
330
- @context.base = @url if @context # Use as base for expanding IRIs
376
+ @url = @options[:base].join(value)
377
+ @options[:base] = @url if @context # Use as base for expanding IRIs
331
378
  when :@id
332
379
  # metadata identifier
333
380
  object[:@id] = if value.is_a?(String)
334
381
  value
335
382
  else
336
383
  warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
337
- ""
384
+ "" # Default value
338
385
  end
339
- @id = base.join(object[:@id])
386
+ @id = @options[:base].join(object[:@id])
340
387
  else
341
388
  if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
342
389
  self.send("#{key}=".to_sym, value)
@@ -348,7 +395,15 @@ module RDF::Tabular
348
395
  end
349
396
 
350
397
  # Set type from @type, if present and not otherwise defined
351
- @type ||= object[:@type].to_sym if object[:@type]
398
+ @type = object[:@type].to_sym if object[:@type]
399
+
400
+ if options[:normalize]
401
+ # If normalizing, also remove remaining @context
402
+ self.normalize!
403
+ @context = nil
404
+ object.delete(:@context)
405
+ end
406
+
352
407
  if reason
353
408
  debug("md#initialize") {reason}
354
409
  debug("md#initialize") {"filenames: #{filenames}"}
@@ -356,8 +411,14 @@ module RDF::Tabular
356
411
  end
357
412
  end
358
413
 
359
- # Setters
414
+ # Getters and Setters
360
415
  INHERITED_PROPERTIES.keys.each do |key|
416
+ define_method(key) do
417
+ object.fetch(key) do
418
+ parent ? parent.send(key) : default_value(key)
419
+ end
420
+ end
421
+
361
422
  define_method("#{key}=".to_sym) do |value|
362
423
  invalid = case key
363
424
  when :aboutUrl, :default, :propertyUrl, :valueUrl
@@ -370,7 +431,7 @@ module RDF::Tabular
370
431
  when :ordered, :required
371
432
  "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
372
433
  when :separator
373
- "single character" unless value.nil? || value.is_a?(String) && value.length == 1
434
+ "string or null" unless value.nil? || value.is_a?(String)
374
435
  when :textDirection
375
436
  "rtl or ltr" unless %(rtl ltr).include?(value)
376
437
  when :datatype
@@ -379,7 +440,7 @@ module RDF::Tabular
379
440
 
380
441
  if invalid
381
442
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
382
- object[key] = default_value(key) unless default_value(key).nil?
443
+ object.delete(key)
383
444
  else
384
445
  object[key] = value
385
446
  end
@@ -399,18 +460,19 @@ module RDF::Tabular
399
460
  # An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
400
461
  # when loading a remote schema, assign @id from it's location if not already set
401
462
  def tableSchema=(value)
402
- case value
463
+ object[:tableSchema] = case value
403
464
  when String
404
- link = base.join(value).to_s
405
- s = Schema.open(link, @options.merge(parent: self, context: nil))
406
- s[:@id] ||= link
407
- object[:tableSchema] = s
465
+ link = context.base.join(value).to_s
466
+ md = Schema.open(link, @options.merge(parent: self, context: nil, normalize: true))
467
+ md[:@id] ||= link
468
+ md
408
469
  when Hash
409
- object[:tableSchema] = Metadata.new(value, @options.merge(parent: self, context: nil))
470
+ Schema.new(value, @options.merge(parent: self, context: nil))
410
471
  when Schema
411
- object[:tableSchema] = value
472
+ value
412
473
  else
413
474
  warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
475
+ Schema.new({}, @options.merge(parent: self, context: nil))
414
476
  end
415
477
  end
416
478
 
@@ -445,13 +507,16 @@ module RDF::Tabular
445
507
  end
446
508
 
447
509
  # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
448
- @dialect = case value
510
+ @dialect = object[:dialect] = case value
449
511
  when String
450
- object[:dialect] = Metadata.open(base.join(value), @options.merge(parent: self, context: nil))
512
+ link = context.base.join(value).to_s
513
+ md = Metadata.open(link, @options.merge(parent: self, context: nil, normalize: true))
514
+ md[:@id] ||= link
515
+ md
451
516
  when Hash
452
- object[:dialect] = Metadata.new(value, @options.merge(parent: self, context: nil))
517
+ Dialect.new(value, @options.merge(parent: self, context: nil))
453
518
  when Dialect
454
- object[:dialect] = value
519
+ value
455
520
  else
456
521
  warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
457
522
  nil
@@ -460,16 +525,18 @@ module RDF::Tabular
460
525
 
461
526
  # Set new datatype
462
527
  # @return [Dialect]
528
+ # @raise [Error] if datatype is not valid
463
529
  def datatype=(value)
464
530
  val = case value
465
531
  when Hash then Datatype.new(value, parent: self)
466
532
  else Datatype.new({base: value}, parent: self)
467
533
  end
468
534
 
469
- if val.valid?
535
+ if val.valid? || value.is_a?(Hash)
536
+ # Set it if it was specified as an object, which may cause validation errors later
470
537
  object[:datatype] = val
471
538
  else
472
- warn "#{type} has invalid property 'datatype': expected a Datatype"
539
+ warn "#{type} has invalid property 'datatype': expected a built-in or an object"
473
540
  end
474
541
  end
475
542
 
@@ -538,7 +605,7 @@ module RDF::Tabular
538
605
  value = object[key]
539
606
  case key
540
607
  when :base
541
- warn "#{type} has invalid base '#{key}': #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value) || RDF::URI(value).absolute?
608
+ errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
542
609
  when :columns
543
610
  value.each do |v|
544
611
  begin
@@ -549,14 +616,20 @@ module RDF::Tabular
549
616
  end
550
617
  column_names = value.map(&:name)
551
618
  errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
552
- when :dialect, :tables, :tableSchema, :transformations
619
+ when :datatype, :dialect, :tables, :tableSchema, :transformations
553
620
  Array(value).each do |t|
554
- begin
555
- t.validate!
556
- rescue Error => e
557
- errors << e.message
621
+ # Make sure value is of appropriate class
622
+ if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
623
+ begin
624
+ t.validate!
625
+ rescue Error => e
626
+ errors << e.message
627
+ end
628
+ else
629
+ errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
558
630
  end
559
631
  end
632
+ errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
560
633
  when :foreignKeys
561
634
  # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
562
635
  value.each do |fk|
@@ -577,13 +650,13 @@ module RDF::Tabular
577
650
  errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
578
651
  end
579
652
  # resource is the URL of a Table in the TableGroup
580
- ref = base.join(reference['resource']).to_s
653
+ ref = context.base.join(reference['resource']).to_s
581
654
  table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
582
655
  errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
583
656
  table.tableSchema if table
584
657
  elsif reference.has_key?('schemaReference')
585
658
  # resource is the @id of a Schema in the TableGroup
586
- ref = base.join(reference['schemaReference']).to_s
659
+ ref = context.base.join(reference['schemaReference']).to_s
587
660
  tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
588
661
  case tables.length
589
662
  when 0
@@ -608,6 +681,114 @@ module RDF::Tabular
608
681
  errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
609
682
  end
610
683
  end
684
+ when :format
685
+ case value
686
+ when Hash
687
+ # Object form only appropriate for numeric type
688
+ unless %w(
689
+ decimal integer long int short byte double float number
690
+ nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
691
+ unsignedLong unsignedInt unsignedShort unsignedByte
692
+ ).include?(self.base)
693
+ warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
694
+ object.delete(:format) # act as if not set
695
+ end
696
+
697
+ # Otherwise, if it exists, its a UAX35 number pattern
698
+ begin
699
+ parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
700
+ rescue ArgumentError => e
701
+ warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
702
+ object[:format].delete("pattern") # act as if not set
703
+ end
704
+ else
705
+ case self.base
706
+ when 'boolean'
707
+ unless value.split("|").length == 2
708
+ warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
709
+ object.delete(:format) # act as if not set
710
+ end
711
+ when :decimal, :integer, :long, :int, :short, :byte,
712
+ :nonNegativeInteger, :positiveInteger,
713
+ :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
714
+ :nonPositiveInteger, :negativeInteger,
715
+ :double, :float, :number
716
+ begin
717
+ parse_uax35_number(value, nil)
718
+ rescue ArgumentError => e
719
+ warn "#{type} has invalid property '#{key}': #{e.message}"
720
+ object.delete(:format) # act as if not set
721
+ end
722
+ when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
723
+ # Parse and validate format
724
+ begin
725
+ parse_uax35_date(value, nil)
726
+ rescue ArgumentError => e
727
+ warn "#{type} has invalid property '#{key}': #{e.message}"
728
+ object.delete(:format) # act as if not set
729
+ end
730
+ else
731
+ # Otherwise, if it exists, its a regular expression
732
+ begin
733
+ Regexp.compile(value)
734
+ rescue
735
+ warn "#{type} has invalid property '#{key}': #{$!.message}"
736
+ object.delete(:format) # act as if not set
737
+ end
738
+ end
739
+ end
740
+ when :length, :minLength, :maxLength
741
+ # Applications must raise an error if both length and minLength are specified and length is less than minLength.
742
+ # Similarly, applications must raise an error if both length and maxLength are specified and length is greater than maxLength.
743
+ if object[:length]
744
+ case key
745
+ when :minLength
746
+ errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
747
+ when :maxLength
748
+ errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
749
+ end
750
+ end
751
+
752
+ # Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
753
+ if key == :maxLength && object[:minLength]
754
+ errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
755
+ end
756
+
757
+ # Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
758
+ unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
759
+ errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
760
+ end
761
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
762
+ case self.base
763
+ when 'decimal', 'integer', 'long', 'int', 'short', 'byte', 'double', 'number', 'float',
764
+ 'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
765
+ 'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
766
+ 'duration', 'dayTimeDuration', 'yearMonthDuration'
767
+ errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
768
+
769
+ case key
770
+ when :minInclusive
771
+ # Applications MUST raise an error if both minInclusive and minExclusive are specified
772
+ errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
773
+
774
+ # Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
775
+ errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
776
+
777
+ # Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
778
+ errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
779
+ when :maxInclusive
780
+ # Applications MUST raise an error if both maxInclusive and maxExclusive are specified
781
+ errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
782
+ when :minExclusive
783
+ # Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
784
+ errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
785
+
786
+ # Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
787
+ errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
788
+ end
789
+ else
790
+ errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
791
+ end
611
792
  when :notes
612
793
  unless value.is_a?(Hash) || value.is_a?(Array)
613
794
  errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
@@ -617,7 +798,7 @@ module RDF::Tabular
617
798
  rescue Error => e
618
799
  errors << "#{type} has invalid content '#{key}': #{e.message}"
619
800
  end
620
- when :primaryKey
801
+ when :primaryKey, :rowTitles
621
802
  # A column reference property that holds either a single reference to a column description object or an array of references.
622
803
  "#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
623
804
  Array(value).each do |k|
@@ -628,9 +809,18 @@ module RDF::Tabular
628
809
  when :@id
629
810
  # Must not be a BNode
630
811
  if value.to_s.start_with?("_:")
631
- errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:"
812
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
813
+ end
814
+
815
+ # Datatype @id MUST NOT be the URL of a built-in type
816
+ if self.is_a?(Datatype) && DATATYPES.values.include?(value)
817
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
632
818
  end
633
819
  when :@type
820
+ # Must not be a BNode
821
+ if value.to_s.start_with?("_:")
822
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
823
+ end
634
824
  unless value.to_sym == type
635
825
  errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
636
826
  end
@@ -690,7 +880,9 @@ module RDF::Tabular
690
880
  next
691
881
  end
692
882
  number += 1
693
- yield(Row.new(data, self, number, number + skipped))
883
+ row = Row.new(data, self, number, number + skipped, @options)
884
+ (self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
885
+ yield(row)
694
886
  end
695
887
  end
696
888
 
@@ -775,18 +967,39 @@ module RDF::Tabular
775
967
  object.keys.any? {|k| k.to_s.include?(':')}
776
968
  end
777
969
 
970
+ # Does this metadata describe the file (URL)?
971
+ # @param [RDF::URL] url
972
+ # @return [Boolean]
973
+ def describes_file?(url)
974
+ case self
975
+ when TableGroup
976
+ tables.any? {|t| t.url == url}
977
+ else
978
+ self.url == url
979
+ end
980
+ end
981
+
778
982
  # Verify that the metadata we're using is compatible with embedded metadata
779
983
  # @param [Table] other
780
984
  # @raise [Error] if not compatible
781
985
  def verify_compatible!(other)
782
986
  if self.is_a?(TableGroup)
783
987
  unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
784
- raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
988
+ if @options[:validate]
989
+ raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
990
+ else
991
+ warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
992
+ end
785
993
  end
786
994
  else
787
995
  # Tables must have the same url
788
- raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}" unless
789
- url == other.url
996
+ unless url == other.url
997
+ if @options[:validate]
998
+ raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
999
+ else
1000
+ warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
1001
+ end
1002
+ end
790
1003
 
791
1004
  # Each column description within B MUST match the corresponding column description in A for non-virtual columns
792
1005
  non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
@@ -798,23 +1011,41 @@ module RDF::Tabular
798
1011
  index = 0
799
1012
  object_columns.all? do |cb|
800
1013
  ca = non_virtual_columns[index]
801
- va = ([ca[:name]] + case ca[:titles]
802
- when String then [ca[:titles]]
803
- when Array then ca[:titles]
804
- when Hash then ca[:titles].values.flatten
805
- else []
806
- end).compact.map(&:downcase)
807
-
808
- vb = ([cb[:name]] + case cb[:titles]
809
- when String then [cb[:titles]]
810
- when Array then cb[:titles]
811
- when Hash then cb[:titles].values.flatten
812
- else []
813
- end).compact.map(&:downcase)
814
-
815
- # If there's a non-empty case-insensitive intersection between the name and titles values for the column description at the same index within A and B, the column description in B is compatible with the matching column description in A
816
- raise Error, "Columns don't match: va: #{va}, vb: #{vb}" if (va & vb).empty?
817
- debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
1014
+ ta = ca.titles || {}
1015
+ tb = cb.titles || {}
1016
+ if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1017
+ true
1018
+ elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1019
+ raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
1020
+ elsif @options[:validate] || !ta.empty? && !tb.empty?
1021
+ # If validating, column compatibility requires strict match between titles
1022
+ titles_match = case
1023
+ when Array(ta['und']).any? {|t| tb.values.flatten.compact.include?(t)}
1024
+ true
1025
+ when Array(tb['und']).any? {|t| ta.values.flatten.compact.include?(t)}
1026
+ true
1027
+ when ta.any? {|lang, values| !(Array(tb[lang]) & Array(values)).empty?}
1028
+ # Match on title and language
1029
+ true
1030
+ else
1031
+ # Match if a language from ta is a prefix of a language from tb with matching titles
1032
+ ta.any? do |la, values|
1033
+ tb.keys.any? do |lb|
1034
+ (la.start_with?(lb) || lb.start_with?(la)) && !(Array(tb[lb]) & Array(values)).empty?
1035
+ end
1036
+ end
1037
+ end
1038
+
1039
+ if titles_match
1040
+ true
1041
+ elsif !@options[:validate]
1042
+ # If not validating, columns don't match, but processing continues
1043
+ warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1044
+ true
1045
+ else
1046
+ raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
1047
+ end
1048
+ end
818
1049
  index += 1
819
1050
  end
820
1051
  end
@@ -822,7 +1053,7 @@ module RDF::Tabular
822
1053
  end
823
1054
 
824
1055
  def inspect
825
- self.class.name + object.inspect
1056
+ self.class.name + (respond_to?(:to_atd) ? to_atd : object).inspect
826
1057
  end
827
1058
 
828
1059
  # Proxy to @object
@@ -830,7 +1061,7 @@ module RDF::Tabular
830
1061
  def []=(key, value); object[key] = value; end
831
1062
  def each(&block); object.each(&block); end
832
1063
  def ==(other)
833
- object == (other.is_a?(Hash) ? other : other.object)
1064
+ object == (other.is_a?(Hash) ? other : (other.respond_to?(:object) ? other.object : other))
834
1065
  end
835
1066
  def to_json(args=nil); object.to_json(args); end
836
1067
 
@@ -845,8 +1076,6 @@ module RDF::Tabular
845
1076
  normalize_jsonld(key, value)
846
1077
  when ->(k) {key.to_s == '@context'}
847
1078
  "http://www.w3.org/ns/csvw"
848
- when :link
849
- base.join(value).to_s
850
1079
  when :array
851
1080
  value = [value] unless value.is_a?(Array)
852
1081
  value.map do |v|
@@ -854,13 +1083,15 @@ module RDF::Tabular
854
1083
  v.normalize!
855
1084
  elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
856
1085
  # SPEC SUGGESTION: special case for foreignKeys
857
- ref["resource"] = base.join(ref["resource"]).to_s if ref["resource"]
858
- ref["schemaReference"] = base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
1086
+ ref["resource"] = context.base.join(ref["resource"]).to_s if ref["resource"]
1087
+ ref["schemaReference"] = context.base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
859
1088
  v
860
1089
  else
861
1090
  v
862
1091
  end
863
1092
  end
1093
+ when :link
1094
+ context.base.join(value).to_s
864
1095
  when :object
865
1096
  case value
866
1097
  when Metadata then value.normalize!
@@ -872,6 +1103,14 @@ module RDF::Tabular
872
1103
  end
873
1104
  when :natural_language
874
1105
  value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
1106
+ when :atomic
1107
+ case key
1108
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1109
+ # Convert to a typed literal based on `base`. This will be validated later
1110
+ RDF::Literal(value, datatype: DATATYPES[self.base.to_sym])
1111
+ else
1112
+ value
1113
+ end
875
1114
  else
876
1115
  value
877
1116
  end
@@ -901,10 +1140,10 @@ module RDF::Tabular
901
1140
  raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
902
1141
  elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
903
1142
  raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
904
- elsif value['@language'] && !BCP47::Language.identify(value['@language'])
905
- warn "Value object with @language must use valid language: #{value.to_json}" if @warnings
1143
+ elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
1144
+ warn "Value object with @language must use valid language: #{value.to_json}"
906
1145
  value.delete('@language')
907
- elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
1146
+ elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
908
1147
  raise Error, "Value object with @type must defined type: #{value.to_json}"
909
1148
  end
910
1149
  value
@@ -919,7 +1158,7 @@ module RDF::Tabular
919
1158
  Array(v).each do |vv|
920
1159
  # Validate that all type values transform to absolute IRIs
921
1160
  resource = context.expand_iri(vv, vocab: true)
922
- raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.uri? && resource.absolute?
1161
+ raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
923
1162
  end
924
1163
  nv[k] = v
925
1164
  when /^(@|_:)/
@@ -981,13 +1220,6 @@ module RDF::Tabular
981
1220
  end
982
1221
  end
983
1222
 
984
- def inherited_property_value(method)
985
- # Inherited properties
986
- object.fetch(method.to_sym) do
987
- parent.send(method) if parent
988
- end
989
- end
990
-
991
1223
  def default_value(prop)
992
1224
  self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
993
1225
  end
@@ -1033,24 +1265,30 @@ module RDF::Tabular
1033
1265
  transformations: :array,
1034
1266
  }.freeze
1035
1267
  DEFAULTS = {
1036
- tableDirection: "default".freeze,
1268
+ tableDirection: "auto".freeze,
1037
1269
  }.freeze
1038
1270
  REQUIRED = [:tables].freeze
1039
1271
 
1040
- # Setters
1272
+ # Getters and Setters
1041
1273
  PROPERTIES.each do |key, type|
1042
- next if [:tables, :tableSchema, :dialect, :transformations].include?(key)
1274
+ next if [:dialect].include?(key)
1275
+
1276
+ define_method(key) do
1277
+ object.fetch(key, DEFAULTS[key])
1278
+ end
1279
+
1280
+ next if [:tables, :tableSchema, :transformations].include?(key)
1043
1281
  define_method("#{key}=".to_sym) do |value|
1044
1282
  invalid = case key
1045
1283
  when :tableDirection
1046
- "rtl, ltr, or default" unless %(rtl ltr default).include?(value)
1284
+ "rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
1047
1285
  when :notes, :tables, :tableSchema, :dialect, :transformations
1048
1286
  # We handle this through a separate setters
1049
1287
  end
1050
1288
 
1051
1289
  if invalid
1052
1290
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1053
- object[key] = default_value(key) unless default_value(key).nil?
1291
+ object.delete(key)
1054
1292
  else
1055
1293
  object[key] = value
1056
1294
  end
@@ -1063,15 +1301,6 @@ module RDF::Tabular
1063
1301
  super || tables.any? {|t| t.has_annotations? }
1064
1302
  end
1065
1303
 
1066
- # Logic for accessing elements as accessors
1067
- def method_missing(method, *args)
1068
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1069
- inherited_property_value(method.to_sym)
1070
- else
1071
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1072
- end
1073
- end
1074
-
1075
1304
  ##
1076
1305
  # Iterate over all tables
1077
1306
  # @yield [Table]
@@ -1102,7 +1331,7 @@ module RDF::Tabular
1102
1331
  object.inject({
1103
1332
  "@id" => (id.to_s if id),
1104
1333
  "@type" => "AnnotatedTableGroup",
1105
- "tables" => []
1334
+ "tables" => Array(self.tables).map(&:to_atd)
1106
1335
  }) do |memo, (k, v)|
1107
1336
  memo[k.to_s] ||= v
1108
1337
  memo
@@ -1124,32 +1353,37 @@ module RDF::Tabular
1124
1353
  }.freeze
1125
1354
  DEFAULTS = {
1126
1355
  suppressOutput: false,
1127
- tableDirection: "default".freeze,
1356
+ tableDirection: "auto".freeze,
1128
1357
  }.freeze
1129
1358
  REQUIRED = [:url].freeze
1130
1359
 
1131
- # Setters
1360
+ # Getters and Setters
1132
1361
  PROPERTIES.each do |key, type|
1133
- next if [:tableSchema, :dialect, :transformations].include?(key)
1362
+ next if [:dialect, :url].include?(key)
1363
+ define_method(key) do
1364
+ object.fetch(key, DEFAULTS[key])
1365
+ end
1366
+
1367
+ next if [:tableSchema, :transformations].include?(key)
1134
1368
  define_method("#{key}=".to_sym) do |value|
1135
1369
  invalid = case key
1136
1370
  when :suppressOutput
1137
1371
  "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
1138
1372
  when :tableDirection
1139
- "rtl, ltr, or default" unless %(rtl ltr default).include?(value)
1373
+ "rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
1140
1374
  when :url
1141
- "valid URL" unless value.is_a?(String) && base.join(value).valid?
1375
+ "valid URL" unless value.is_a?(String) && context.base.join(value).valid?
1142
1376
  when :notes, :tableSchema, :dialect, :transformations
1143
1377
  # We handle this through a separate setters
1144
1378
  end
1145
1379
 
1146
1380
  if invalid
1147
1381
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1148
- object[key] = default_value(key) unless default_value(key).nil?
1382
+ object.delete(key)
1149
1383
  elsif key == :url
1150
1384
  # URL of CSV relative to metadata
1151
1385
  object[:url] = value
1152
- @url = base.join(value)
1386
+ @url = context.base.join(value)
1153
1387
  @context.base = @url if @context # Use as base for expanding IRIs
1154
1388
  else
1155
1389
  object[key] = value
@@ -1180,22 +1414,13 @@ module RDF::Tabular
1180
1414
  "@id" => (id.to_s if id),
1181
1415
  "@type" => "AnnotatedTable",
1182
1416
  "url" => self.url.to_s,
1183
- "columns" => tableSchema.columns.map(&:to_atd),
1417
+ "columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
1184
1418
  "rows" => []
1185
1419
  }) do |memo, (k, v)|
1186
1420
  memo[k.to_s] ||= v
1187
1421
  memo
1188
1422
  end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
1189
1423
  end
1190
-
1191
- # Logic for accessing elements as accessors
1192
- def method_missing(method, *args)
1193
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1194
- inherited_property_value(method.to_sym)
1195
- else
1196
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1197
- end
1198
- end
1199
1424
  end
1200
1425
 
1201
1426
  class Schema < Metadata
@@ -1205,21 +1430,26 @@ module RDF::Tabular
1205
1430
  columns: :array,
1206
1431
  foreignKeys: :array,
1207
1432
  primaryKey: :column_reference,
1433
+ rowTitles: :column_reference,
1208
1434
  }.freeze
1209
1435
  DEFAULTS = {}.freeze
1210
1436
  REQUIRED = [].freeze
1211
1437
 
1212
- # Setters
1438
+ # Getters and Setters
1213
1439
  PROPERTIES.each do |key, type|
1440
+ define_method(key) do
1441
+ object.fetch(key, DEFAULTS[key])
1442
+ end
1443
+
1214
1444
  define_method("#{key}=".to_sym) do |value|
1215
1445
  invalid = case key
1216
- when :primaryKey
1446
+ when :primaryKey, :rowTitles
1217
1447
  "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
1218
1448
  end
1219
1449
 
1220
1450
  if invalid
1221
1451
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1222
- object[key] = default_value(key) unless default_value(key).nil?
1452
+ object.delete(key)
1223
1453
  else
1224
1454
  object[key] = value
1225
1455
  end
@@ -1270,12 +1500,21 @@ module RDF::Tabular
1270
1500
  end
1271
1501
  end
1272
1502
 
1273
- # Logic for accessing elements as accessors
1274
- def method_missing(method, *args)
1275
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1276
- inherited_property_value(method.to_sym)
1277
- else
1278
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1503
+ ##
1504
+ # List of foreign keys referencing the specified table
1505
+ #
1506
+ # @param [Table] table
1507
+ # @return [Array<Hash>]
1508
+ def foreign_keys_referencing(table)
1509
+ Array(foreignKeys).select do |fk|
1510
+ reference = fk['reference']
1511
+ if reference['resource']
1512
+ ref = context.base.join(reference['resource']).to_s
1513
+ table.url == ref
1514
+ else # schemaReference
1515
+ ref = context.base.join(reference['schemaReference']).to_s
1516
+ table.tableSchema.id == ref
1517
+ end
1279
1518
  end
1280
1519
  end
1281
1520
  end
@@ -1321,8 +1560,12 @@ module RDF::Tabular
1321
1560
  super || columns.any? {|c| c.has_annotations? }
1322
1561
  end
1323
1562
 
1324
- # Setters
1563
+ # Getters and Setters
1325
1564
  PROPERTIES.each do |key, t|
1565
+ define_method(key) do
1566
+ object.fetch(key, DEFAULTS[key])
1567
+ end
1568
+
1326
1569
  define_method("#{key}=".to_sym) do |value|
1327
1570
  invalid = case key
1328
1571
  when :name
@@ -1339,7 +1582,7 @@ module RDF::Tabular
1339
1582
  object.delete(key) if object[key].nil?
1340
1583
  elsif invalid
1341
1584
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1342
- object[key] = default_value(key) unless default_value(key).nil?
1585
+ object.delete(key)
1343
1586
  else
1344
1587
  object[key] = value
1345
1588
  end
@@ -1360,7 +1603,7 @@ module RDF::Tabular
1360
1603
  # @return [RDF::URI]
1361
1604
  def id;
1362
1605
  url = table ? table.url : RDF::URI("")
1363
- url + "#col=#{self.sourceNumber}";
1606
+ url.to_s + "#col=#{self.sourceNumber}";
1364
1607
  end
1365
1608
 
1366
1609
  # Return Annotated Column representation
@@ -1380,15 +1623,6 @@ module RDF::Tabular
1380
1623
  memo
1381
1624
  end.delete_if {|k,v| v.nil?}
1382
1625
  end
1383
-
1384
- # Logic for accessing elements as accessors
1385
- def method_missing(method, *args)
1386
- if INHERITED_PROPERTIES.has_key?(method.to_sym)
1387
- inherited_property_value(method.to_sym)
1388
- else
1389
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1390
- end
1391
- end
1392
1626
  end
1393
1627
 
1394
1628
  class Transformation < Metadata
@@ -1404,8 +1638,13 @@ module RDF::Tabular
1404
1638
  DEFAULTS = {}.freeze
1405
1639
  REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1406
1640
 
1407
- # Setters
1641
+ # Getters and Setters
1408
1642
  PROPERTIES.each do |key, type|
1643
+ next if [:url].include?(key)
1644
+ define_method(key) do
1645
+ object.fetch(key, DEFAULTS[key])
1646
+ end
1647
+
1409
1648
  define_method("#{key}=".to_sym) do |value|
1410
1649
  invalid = case key
1411
1650
  when :scriptFormat, :targetFormat
@@ -1416,17 +1655,12 @@ module RDF::Tabular
1416
1655
 
1417
1656
  if invalid
1418
1657
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1419
- object[key] = default_value(key) unless default_value(key).nil?
1658
+ object.delete(key)
1420
1659
  else
1421
1660
  object[key] = value
1422
1661
  end
1423
1662
  end
1424
1663
  end
1425
-
1426
- # Logic for accessing elements as accessors
1427
- def method_missing(method, *args)
1428
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1429
- end
1430
1664
  end
1431
1665
 
1432
1666
  class Dialect < Metadata
@@ -1444,7 +1678,7 @@ module RDF::Tabular
1444
1678
  skipColumns: 0,
1445
1679
  skipInitialSpace: false,
1446
1680
  skipRows: 0,
1447
- trim: false
1681
+ trim: true
1448
1682
  }.freeze
1449
1683
 
1450
1684
  PROPERTIES = {
@@ -1467,13 +1701,15 @@ module RDF::Tabular
1467
1701
 
1468
1702
  REQUIRED = [].freeze
1469
1703
 
1470
- # Setters
1704
+ # Getters and Setters
1471
1705
  PROPERTIES.keys.each do |key|
1706
+ define_method(key) do
1707
+ object.fetch(key, DEFAULTS[key])
1708
+ end
1709
+
1472
1710
  define_method("#{key}=".to_sym) do |value|
1473
1711
  invalid = case key
1474
- when :commentPrefix, :delimiter, :quoteChar
1475
- "a single character string" unless value.is_a?(String) && value.length == 1
1476
- when :lineTerminators
1712
+ when :commentPrefix, :delimiter, :quoteChar, :lineTerminators
1477
1713
  "a string" unless value.is_a?(String)
1478
1714
  when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
1479
1715
  "boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
@@ -1493,7 +1729,7 @@ module RDF::Tabular
1493
1729
  object.delete(key) if object[key].nil?
1494
1730
  elsif invalid
1495
1731
  warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1496
- object[key] = default_value(key) unless default_value(key).nil?
1732
+ object.delete(key)
1497
1733
  else
1498
1734
  object[key] = value
1499
1735
  end
@@ -1515,7 +1751,7 @@ module RDF::Tabular
1515
1751
  # default for trim comes from skipInitialSpace
1516
1752
  # @return [Boolean, String]
1517
1753
  def trim
1518
- object.fetch(:trim, self.skipInitialSpace ? 'start' : false)
1754
+ object.fetch(:trim, self.skipInitialSpace ? 'start' : true)
1519
1755
  end
1520
1756
 
1521
1757
  ##
@@ -1546,7 +1782,8 @@ module RDF::Tabular
1546
1782
  }
1547
1783
  }
1548
1784
  metadata ||= table # In case the embedded metadata becomes the final metadata
1549
- metadata["lang"] = options[:lang] if options[:lang]
1785
+ lang = metadata["lang"] = options[:lang] if options[:lang]
1786
+ lang ||= 'und'
1550
1787
 
1551
1788
  # Set encoding on input
1552
1789
  csv = ::CSV.new(input, csv_options)
@@ -1575,9 +1812,9 @@ module RDF::Tabular
1575
1812
  # Initialize titles
1576
1813
  columns = table["tableSchema"]["columns"] ||= []
1577
1814
  column = columns[index - skipCols] ||= {
1578
- "titles" => {"und" => []},
1815
+ "titles" => {lang => []},
1579
1816
  }
1580
- column["titles"]["und"] << value
1817
+ column["titles"][lang] << value
1581
1818
  end
1582
1819
  end
1583
1820
  debug("embedded_metadata") {"table: #{table.inspect}"}
@@ -1585,20 +1822,12 @@ module RDF::Tabular
1585
1822
 
1586
1823
  Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
1587
1824
  end
1588
-
1589
- # Logic for accessing elements as accessors
1590
- def method_missing(method, *args)
1591
- if DEFAULTS.has_key?(method.to_sym)
1592
- # As set, or with default
1593
- object.fetch(method.to_sym, DEFAULTS[method.to_sym])
1594
- else
1595
- super
1596
- end
1597
- end
1598
1825
  end
1599
1826
 
1600
1827
  class Datatype < Metadata
1601
1828
  PROPERTIES = {
1829
+ :@id => :link,
1830
+ :@type => :atomic,
1602
1831
  base: :atomic,
1603
1832
  format: :atomic,
1604
1833
  length: :atomic,
@@ -1612,50 +1841,248 @@ module RDF::Tabular
1612
1841
  maxExclusive: :atomic,
1613
1842
  }.freeze
1614
1843
  REQUIRED = [].freeze
1615
- DEFAULTS = {}.freeze
1844
+ DEFAULTS = {
1845
+ base: "string"
1846
+ }.freeze
1616
1847
 
1617
1848
  # Override `base` in Metadata
1618
1849
  def base; object[:base]; end
1619
1850
 
1620
- # Setters
1851
+ # Getters and Setters
1621
1852
  PROPERTIES.each do |key, type|
1853
+ define_method(key) do
1854
+ object.fetch(key, DEFAULTS[key])
1855
+ end
1856
+
1622
1857
  define_method("#{key}=".to_sym) do |value|
1623
1858
  invalid = case key
1859
+ when :base
1860
+ "built-in datatype" unless DATATYPES.keys.map(&:to_s).include?(value)
1624
1861
  when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1625
1862
  "numeric or valid date/time" unless value.is_a?(Numeric) ||
1626
1863
  RDF::Literal::Date.new(value.to_s).valid? ||
1627
1864
  RDF::Literal::Time.new(value.to_s).valid? ||
1628
1865
  RDF::Literal::DateTime.new(value.to_s).valid?
1629
1866
  when :format
1630
- unless value.is_a?(String)
1631
- warn "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
1632
- if default_value(key).nil?
1633
- object.delete(key)
1634
- else
1635
- object[key] = default_value(key)
1867
+ case value
1868
+ when String
1869
+ nil
1870
+ when Hash
1871
+ unless (value.keys.map(&:to_s) - %w(groupChar decimalChar pattern)).empty?
1872
+ "an object containing only groupChar, decimalChar, and/or pattern"
1636
1873
  end
1874
+ else
1875
+ "a string or object"
1637
1876
  end
1638
1877
  when :length, :minLength, :maxLength
1639
1878
  if !(value.is_a?(Numeric) && value.integer? && value >= 0)
1640
1879
  "a non-negative integer"
1641
- elsif key != :length && object[:length] && value != object[:length]
1642
- # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
1643
- "both length and #{key} requires they be equal"
1644
1880
  end
1645
1881
  end
1646
1882
 
1647
1883
  if invalid
1648
- warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1649
- object[key] = default_value(key) unless default_value(key).nil?
1884
+ warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
1885
+ object.delete(key)
1650
1886
  else
1651
1887
  object[key] = value
1652
1888
  end
1653
1889
  end
1654
1890
  end
1655
1891
 
1656
- # Logic for accessing elements as accessors
1657
- def method_missing(method, *args)
1658
- PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1892
+ ##
1893
+ # Parse the date format (if provided), and match against the value (if provided)
1894
+ # Otherwise, validate format and raise an error
1895
+ #
1896
+ # @param [String] format
1897
+ # @param [String] value
1898
+ # @return [String] XMLSchema version of value
1899
+ # @raise [ArgumentError] if format is not valid, or nil, if value does not match
1900
+ def parse_uax35_date(format, value)
1901
+ tz, date_format, time_format = nil, nil, nil
1902
+ return value unless format
1903
+ value ||= ""
1904
+
1905
+ # Extract tz info
1906
+ if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
1907
+ format, tz = md[1], md[2]
1908
+ end
1909
+
1910
+ date_format, time_format = format.split(' ')
1911
+ date_format, time_format = nil, date_format if self.base.to_sym == :time
1912
+
1913
+ # Extract date, of specified
1914
+ date_part = case date_format
1915
+ when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1916
+ when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1917
+ when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1918
+ when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1919
+ when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1920
+ when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1921
+ when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1922
+ when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1923
+ when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1924
+ when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1925
+ when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1926
+ when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1927
+ when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1928
+ when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1929
+ when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
1930
+ when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1931
+ when /yyyy-MM-ddTHH:mm:ss\.S+/
1932
+ md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1933
+ num_ms = date_format.match(/S+/).to_s.length
1934
+ md if md && md[:ms].length <= num_ms
1935
+ else
1936
+ raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
1937
+ nil
1938
+ end
1939
+
1940
+ # Forward past date part
1941
+ if date_part
1942
+ value = value[date_part.to_s.length..-1]
1943
+ value = value.lstrip if date_part && value.start_with?(' ')
1944
+ end
1945
+
1946
+ # Extract time, of specified
1947
+ time_part = case time_format
1948
+ when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
1949
+ when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
1950
+ when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
1951
+ when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
1952
+ when /HH:mm:ss\.S+/
1953
+ md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
1954
+ num_ms = time_format.match(/S+/).to_s.length
1955
+ md if md && md[:ms].length <= num_ms
1956
+ else
1957
+ raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
1958
+ nil
1959
+ end
1960
+
1961
+ # If there's a date_format but no date_part, match fails
1962
+ return nil if date_format && date_part.nil?
1963
+
1964
+ # If there's a time_format but no time_part, match fails
1965
+ return nil if time_format && time_part.nil?
1966
+
1967
+ # Forward past time part
1968
+ value = value[time_part.to_s.length..-1] if time_part
1969
+
1970
+ # Use datetime match for time
1971
+ time_part = date_part if date_part && date_part.names.include?("hr")
1972
+
1973
+ # If there's a timezone, it may optionally start with whitespace
1974
+ value = value.lstrip if tz.to_s.start_with?(' ')
1975
+ tz_part = value if tz
1976
+
1977
+ # Compose normalized value
1978
+ vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1979
+ vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
1980
+
1981
+ # Add milliseconds, if matched
1982
+ vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
1983
+
1984
+ value = [vd, vt].compact.join('T')
1985
+ value += tz_part.to_s
1986
+ end
1987
+
1988
+ ##
1989
+ # Parse the date format (if provided), and match against the value (if provided)
1990
+ # Otherwise, validate format and raise an error
1991
+ #
1992
+ # @param [String] pattern
1993
+ # @param [String] value
1994
+ # @param [String] groupChar
1995
+ # @param [String] decimalChar
1996
+ # @return [String] XMLSchema version of value or nil, if value does not match
1997
+ # @raise [ArgumentError] if format is not valid
1998
+ def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
1999
+ return value if pattern.to_s.empty?
2000
+ value ||= ""
2001
+
2002
+ re = build_number_re(pattern, groupChar, decimalChar)
2003
+
2004
+ # Upcase value and remove internal spaces
2005
+ value = value.upcase.gsub(/\s+/, '')
2006
+
2007
+ # Remove groupChar from value
2008
+ value = value.gsub(groupChar, '')
2009
+
2010
+ # Replace decimalChar with "."
2011
+ value = value.gsub(decimalChar, '.')
2012
+
2013
+ if value =~ re
2014
+ # result re-assembles parts removed from value
2015
+ value
2016
+ else
2017
+ # no match
2018
+ nil
2019
+ end
2020
+ end
2021
+
2022
+ # Build a regular expression from the provided pattern to match value, after suitable modifications
2023
+ #
2024
+ # @param [String] pattern
2025
+ # @param [String] groupChar
2026
+ # @param [String] decimalChar
2027
+ # @return [Regexp] Regular expression matching value
2028
+ # @raise [ArgumentError] if format is not valid
2029
+ def build_number_re(pattern, groupChar, decimalChar)
2030
+ # pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
2031
+ legal_number_pattern = /\A
2032
+ ([%‰])?
2033
+ ([+-])?
2034
+ # Mantissa
2035
+ (\#|#{groupChar == '.' ? '\.' : groupChar})*
2036
+ (0|#{groupChar == '.' ? '\.' : groupChar})*
2037
+ # Fractional
2038
+ (?:#{decimalChar == '.' ? '\.' : decimalChar}
2039
+ (0|#{groupChar == '.' ? '\.' : groupChar})*
2040
+ (\#|#{groupChar == '.' ? '\.' : groupChar})*
2041
+ # Exponent
2042
+ (E
2043
+ [+-]?
2044
+ (?:\#|#{groupChar == '.' ? '\.' : groupChar})*
2045
+ (?:0|#{groupChar == '.' ? '\.' : groupChar})*
2046
+ )?
2047
+ )?
2048
+ ([%‰])?
2049
+ \Z/x
2050
+
2051
+ unless pattern =~ legal_number_pattern
2052
+ raise ArgumentError, "unrecognized number pattern #{pattern}"
2053
+ end
2054
+
2055
+ # Remove groupChar from pattern
2056
+ pattern = pattern.gsub(groupChar, '')
2057
+
2058
+ # Replace decimalChar with "."
2059
+ pattern = pattern.gsub(decimalChar, '.')
2060
+
2061
+ # Split on decimalChar and E
2062
+ parts = pattern.split(/[\.E]/)
2063
+
2064
+ # Construct regular expression
2065
+ mantissa_str = case parts[0]
2066
+ when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
2067
+ when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
2068
+ when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
2069
+ end
2070
+
2071
+ fractional_str = case parts[1]
2072
+ when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
2073
+ when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
2074
+ when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
2075
+ end
2076
+ fractional_str = "\\.#{fractional_str}" if fractional_str
2077
+
2078
+ exponent_str = case parts[2]
2079
+ when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
2080
+ when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
2081
+ when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
2082
+ end
2083
+ exponent_str = "E#{exponent_str}" if exponent_str
2084
+
2085
+ Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
1659
2086
  end
1660
2087
  end
1661
2088
 
@@ -1697,6 +2124,10 @@ module RDF::Tabular
1697
2124
  "errors" => self.errors
1698
2125
  }.delete_if {|k,v| Array(v).empty?}
1699
2126
  end
2127
+
2128
+ def inspect
2129
+ self.class.name + to_atd.inspect
2130
+ end
1700
2131
  end
1701
2132
 
1702
2133
  # Row values, hashed by `name`
@@ -1715,6 +2146,16 @@ module RDF::Tabular
1715
2146
  # @return [Table]
1716
2147
  attr_reader :table
1717
2148
 
2149
+ #
2150
+ # Cells providing a unique row identifier
2151
+ # @return [Array<Cell>]
2152
+ attr_reader :primaryKey
2153
+
2154
+ #
2155
+ # Title(s) of this row
2156
+ # @return [Array<RDF::Literal>]
2157
+ attr_reader :titles
2158
+
1718
2159
  #
1719
2160
  # Context from Table with base set to table URL for expanding URI Templates
1720
2161
  # @return [JSON::LD::Context]
@@ -1725,8 +2166,10 @@ module RDF::Tabular
1725
2166
  # @param [Metadata] metadata for Table
1726
2167
  # @param [Integer] number 1-based row number after skipped/header rows
1727
2168
  # @param [Integer] source_number 1-based row number from source
2169
+ # @param [Hash{Symbol => Object}] options ({})
2170
+ # @option options [Boolean] :validate check for PK/FK consistency
1728
2171
  # @return [Row]
1729
- def initialize(row, metadata, number, source_number)
2172
+ def initialize(row, metadata, number, source_number, options = {})
1730
2173
  @table = metadata
1731
2174
  @number = number
1732
2175
  @sourceNumber = source_number
@@ -1748,7 +2191,7 @@ module RDF::Tabular
1748
2191
  end
1749
2192
 
1750
2193
  # Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
1751
- columns.each_with_index {|c, index| row[index] ||= (c.null || '')}
2194
+ columns.each_with_index {|c, index| row[index] ||= c.null}
1752
2195
 
1753
2196
  row.each_with_index do |value, index|
1754
2197
 
@@ -1764,7 +2207,7 @@ module RDF::Tabular
1764
2207
 
1765
2208
  @values << cell = Cell.new(metadata, column, self, value)
1766
2209
 
1767
- datatype = column.datatype || Datatype.new(base: "string", parent: column)
2210
+ datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
1768
2211
  value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
1769
2212
  value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
1770
2213
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
@@ -1787,23 +2230,35 @@ module RDF::Tabular
1787
2230
  v.strip!
1788
2231
  end
1789
2232
 
1790
- expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
2233
+ expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
1791
2234
  if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
1792
2235
  lit_or_errors
1793
2236
  else
1794
2237
  cell_errors += lit_or_errors
1795
- RDF::Literal(v, language: column.lang)
2238
+ RDF::Literal(v, language: (column.lang unless column.lang == "und"))
1796
2239
  end
1797
2240
  end
1798
2241
  end.compact
1799
2242
 
2243
+ # Check for required values
2244
+ if column.required && (cell_values.any? {|v| v.to_s.empty?} || cell_values.empty?)
2245
+ cell_errors << "Required column has empty value(s): #{cell_values.map(&:to_s).inspect}"
2246
+ end
1800
2247
  cell.value = (column.separator ? cell_values : cell_values.first)
1801
2248
  cell.errors = cell_errors
1802
- metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
1803
2249
 
1804
2250
  map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
1805
2251
  end
1806
2252
 
2253
+ # Record primaryKey if validating
2254
+ @primaryKey = @values.
2255
+ select {|cell| Array(table.tableSchema.primaryKey).include?(cell.column.name)} if options[:validate]
2256
+
2257
+ # Record any row titles
2258
+ @titles = @values.
2259
+ select {|cell| Array(table.tableSchema.rowTitles).include?(cell.column.name)}.
2260
+ map(&:value)
2261
+
1807
2262
  # Map URLs for row
1808
2263
  @values.each_with_index do |cell, index|
1809
2264
  mapped_values = map_values.merge(
@@ -1824,30 +2279,24 @@ module RDF::Tabular
1824
2279
  {
1825
2280
  "@id" => id.to_s,
1826
2281
  "@type" => "Row",
1827
- "table" => (table.id.to_s if table.id),
2282
+ "table" => (table.id || table.url),
1828
2283
  "number" => self.number,
1829
2284
  "sourceNumber" => self.sourceNumber,
1830
- "cells" => @values.map(&:to_atd)
2285
+ "cells" => @values.map(&:value)
1831
2286
  }.delete_if {|k,v| v.nil?}
1832
2287
  end
1833
2288
 
2289
+ def inspect
2290
+ self.class.name + to_atd.inspect
2291
+ end
2292
+
1834
2293
  private
1835
2294
  #
1836
2295
  # given a datatype specification, return a literal matching that specififcation, if found, otherwise nil
1837
2296
  # @return [RDF::Literal]
1838
2297
  def value_matching_datatype(value, datatype, expanded_dt, language)
1839
- value_errors = []
1840
-
1841
- # Check constraints
1842
- if datatype.length && value.length != datatype.length
1843
- value_errors << "#{value} does not have length #{datatype.length}"
1844
- end
1845
- if datatype.minLength && value.length < datatype.minLength
1846
- value_errors << "#{value} does not have length >= #{datatype.minLength}"
1847
- end
1848
- if datatype.maxLength && value.length > datatype.maxLength
1849
- value_errors << "#{value} does not have length <= #{datatype.maxLength}"
1850
- end
2298
+ lit, value_errors = nil, []
2299
+ original_value = value.dup
1851
2300
 
1852
2301
  format = datatype.format
1853
2302
  # Datatype specific constraints and conversions
@@ -1857,29 +2306,39 @@ module RDF::Tabular
1857
2306
  :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
1858
2307
  :nonPositiveInteger, :negativeInteger,
1859
2308
  :double, :float, :number
2309
+
1860
2310
  # Normalize representation based on numeric-specific facets
1861
- format ||= {}
1862
- groupChar = format[:groupChar] || ','
1863
- if format[:pattern] && !value.match(Regexp.new(format[:pattern]))
1864
- # pattern facet failed
1865
- value_errors << "#{value} does not match pattern #{format[:pattern]}"
2311
+
2312
+ format = case format
2313
+ when String then {"pattern" => format}
2314
+ when Hash then format
2315
+ else {}
1866
2316
  end
1867
- if value.include?(groupChar*2)
1868
- # pattern facet failed
1869
- value_errors << "#{value} has repeating #{groupChar.inspect}"
2317
+
2318
+ groupChar = format["groupChar"]
2319
+ decimalChar = format["decimalChar"] || '.'
2320
+ pattern = format["pattern"]
2321
+
2322
+ if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
2323
+ value_errors << "#{value} does not match pattern #{pattern}"
1870
2324
  end
1871
- value.gsub!(groupChar, '')
1872
- value.sub!(format[:decimalChar], '.') if format[:decimalChar]
2325
+
2326
+ # pattern facet failed
2327
+ value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
2328
+ value = value.gsub(groupChar, '') if groupChar
2329
+ value = value.sub(decimalChar, '.')
1873
2330
 
1874
2331
  # Extract percent or per-mille sign
1875
2332
  percent = permille = false
1876
- case value
1877
- when /%$/
1878
- value = value[0..-2]
1879
- percent = true
1880
- when /‰$/
1881
- value = value[0..-2]
1882
- permille = true
2333
+ if groupChar
2334
+ case value
2335
+ when /%/
2336
+ value = value.sub('%', '')
2337
+ percent = true
2338
+ when /‰/
2339
+ value = value.sub('‰', '')
2340
+ permille = true
2341
+ end
1883
2342
  end
1884
2343
 
1885
2344
  lit = RDF::Literal(value, datatype: expanded_dt)
@@ -1889,129 +2348,117 @@ module RDF::Tabular
1889
2348
  o = o / 1000 if permille
1890
2349
  lit = RDF::Literal(o, datatype: expanded_dt)
1891
2350
  end
2351
+
2352
+ if !lit.plain? && datatype.minimum && lit < datatype.minimum
2353
+ value_errors << "#{value} < minimum #{datatype.minimum}"
2354
+ end
2355
+ case
2356
+ when datatype.minimum && lit < datatype.minimum
2357
+ value_errors << "#{value} < minimum #{datatype.minimum}"
2358
+ when datatype.maximum && lit > datatype.maximum
2359
+ value_errors << "#{value} > maximum #{datatype.maximum}"
2360
+ when datatype.minInclusive && lit < datatype.minInclusive
2361
+ value_errors << "#{value} < minInclusive #{datatype.minInclusive}"
2362
+ when datatype.maxInclusive && lit > datatype.maxInclusive
2363
+ value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}"
2364
+ when datatype.minExclusive && lit <= datatype.minExclusive
2365
+ value_errors << "#{value} <= minExclusive #{datatype.minExclusive}"
2366
+ when datatype.maxExclusive && lit >= datatype.maxExclusive
2367
+ value_errors << "#{value} ?= maxExclusive #{datatype.maxExclusive}"
2368
+ end
1892
2369
  when :boolean
1893
- lit = if format
2370
+ if format
1894
2371
  # True/False determined by Y|N values
1895
2372
  t, f = format.to_s.split('|', 2)
1896
2373
  case
1897
2374
  when value == t
1898
- value = RDF::Literal::TRUE
2375
+ lit = RDF::Literal::TRUE
1899
2376
  when value == f
1900
- value = RDF::Literal::FALSE
2377
+ lit = RDF::Literal::FALSE
1901
2378
  else
1902
2379
  value_errors << "#{value} does not match boolean format #{format}"
1903
- RDF::Literal::Boolean.new(value)
1904
2380
  end
1905
2381
  else
1906
2382
  if %w(1 true).include?(value.downcase)
1907
- RDF::Literal::TRUE
2383
+ lit = RDF::Literal::TRUE
1908
2384
  elsif %w(0 false).include?(value.downcase)
1909
- RDF::Literal::FALSE
2385
+ lit = RDF::Literal::FALSE
2386
+ else
2387
+ value_errors << "#{value} does not match boolean"
1910
2388
  end
1911
2389
  end
1912
2390
  when :date, :time, :dateTime, :dateTimeStamp, :datetime
1913
- # Match values
1914
- tz, date_format, time_format = nil, nil, nil
1915
-
1916
- # Extract tz info
1917
- if format && (md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/))
1918
- format, tz = md[1], md[2]
2391
+ if value = datatype.parse_uax35_date(format, value)
2392
+ lit = RDF::Literal(value, datatype: expanded_dt)
2393
+ else
2394
+ value_errors << "#{original_value} does not match format #{format}"
1919
2395
  end
1920
-
1921
- if format
1922
- date_format, time_format = format.split(' ')
1923
- if datatype.base.to_sym == :time
1924
- date_format, time_format = nil, date_format
1925
- end
1926
-
1927
- # Extract date, of specified
1928
- date_part = case date_format
1929
- when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1930
- when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1931
- when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1932
- when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1933
- when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1934
- when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1935
- when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1936
- when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1937
- when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1938
- when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1939
- when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1940
- when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1941
- when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1942
- when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1943
- when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
1944
- when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
1945
- else
1946
- value_errors << "unrecognized date/time format #{date_format}" if date_format
1947
- nil
2396
+ when :duration, :dayTimeDuration, :yearMonthDuration
2397
+ # SPEC CONFUSION: surely format also includes that for other duration types?
2398
+ re = Regexp.new(format) rescue nil
2399
+ if re.nil? ||value.match(re)
2400
+ lit = RDF::Literal(value, datatype: expanded_dt)
2401
+ else
2402
+ value_errors << "#{value} does not match format #{format}"
2403
+ end
2404
+ when :hexBinary, :base64Binary
2405
+ lit = RDF::Literal.new(value, datatype: expanded_dt)
2406
+ unless lit.valid?
2407
+ value_errors << "#{value} is invalid"
2408
+ lit = RDF::Literal.new(value)
2409
+ else
2410
+ if datatype.length && lit.object.length != datatype.length
2411
+ value_errors << "decoded #{value} does not have length #{datatype.length}"
1948
2412
  end
1949
-
1950
- # Forward past date part
1951
- if date_part
1952
- value = value[date_part.to_s.length..-1]
1953
- value = value.lstrip if date_part && value.start_with?(' ')
2413
+ if datatype.minLength && lit.object.length < datatype.minLength
2414
+ value_errors << "decoded #{value} does not have length >= #{datatype.length}"
1954
2415
  end
1955
-
1956
- # Extract time, of specified
1957
- time_part = case time_format
1958
- when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
1959
- when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})/)
1960
- when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
1961
- when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)/)
1962
- else
1963
- value_errors << "unrecognized date/time format #{time_format}" if time_format
1964
- nil
2416
+ if datatype.maxLength && lit.object.length < datatype.maxLength
2417
+ value_errors << "decoded #{value} does not have length <= #{datatype.length}"
1965
2418
  end
1966
-
1967
- # Forward past time part
1968
- value = value[time_part.to_s.length..-1] if time_part
1969
-
1970
- # Use datetime match for time
1971
- time_part = date_part if date_part && date_part.names.include?("hr")
1972
-
1973
- # If there's a timezone, it may optionally start with whitespace
1974
- value = value.lstrip if tz.to_s.start_with?(' ')
1975
- tz_part = value if tz
1976
-
1977
- # Compose normalized value
1978
- vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
1979
- vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
1980
- value = [vd, vt].compact.join('T')
1981
- value += tz_part.to_s
1982
2419
  end
1983
-
1984
- lit = RDF::Literal(value, datatype: expanded_dt)
1985
- when :duration, :dayTimeDuration, :yearMonthDuration
1986
- # SPEC CONFUSION: surely format also includes that for other duration types?
1987
- lit = RDF::Literal(value, datatype: expanded_dt)
1988
2420
  when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
1989
2421
  :ENTITY, :ID, :IDREF, :NOTATION
1990
2422
  value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
1991
2423
  else
1992
2424
  # For other types, format is a regexp
1993
- unless format.nil? || value.match(Regexp.new(format))
2425
+ re = Regexp.new(format) rescue nil
2426
+ unless re.nil? || value.match(re)
1994
2427
  value_errors << "#{value} does not match format #{format}"
1995
2428
  end
1996
2429
  lit = if value_errors.empty?
1997
2430
  if expanded_dt == RDF::XSD.string
1998
2431
  # Type string will still use language
1999
- RDF::Literal(value, language: language)
2432
+ RDF::Literal(value, language: (language unless language == "und"))
2000
2433
  else
2001
2434
  RDF::Literal(value, datatype: expanded_dt)
2002
2435
  end
2003
2436
  end
2004
2437
  end
2005
2438
 
2439
+ if datatype.length && value.to_s.length != datatype.length && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
2440
+ value_errors << "#{value} does not have length #{datatype.length}"
2441
+ end
2442
+ if datatype.minLength && value.to_s.length < datatype.minLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
2443
+ value_errors << "#{value} does not have length >= #{datatype.minLength}"
2444
+ end
2445
+ if datatype.maxLength && value.to_s.length > datatype.maxLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
2446
+ value_errors << "#{value} does not have length <= #{datatype.maxLength}"
2447
+ end
2448
+
2449
+ # value constraints
2450
+ value_errors << "#{value} < minimum #{datatype.minimum}" if datatype.minimum && lit < datatype.minimum
2451
+ value_errors << "#{value} > maximum #{datatype.maximum}" if datatype.maximum && lit > datatype.maximum
2452
+ value_errors << "#{value} < minInclusive #{datatype.minInclusive}" if datatype.minInclusive && lit < datatype.minInclusive
2453
+ value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}" if datatype.maxInclusive && lit > datatype.maxInclusive
2454
+ value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" if datatype.minExclusive && lit <= datatype.minExclusive
2455
+ value_errors << "#{value} >= maxExclusive #{datatype.maxExclusive}" if datatype.maxExclusive && lit >= datatype.maxExclusive
2456
+
2006
2457
  # Final value is a valid literal, or a plain literal otherwise
2007
2458
  value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
2008
2459
 
2009
- # FIXME Value constraints
2010
-
2460
+ # Either return matched literal value or errors
2011
2461
  value_errors.empty? ? lit : value_errors
2012
2462
  end
2013
2463
  end
2014
-
2015
- # Metadata errors detected
2016
- class Error < StandardError; end
2017
2464
  end