rdf-tabular 0.1.3.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/VERSION +1 -1
- data/etc/README +4 -0
- data/etc/csvw.jsonld +1 -1
- data/etc/earl.html +10544 -0
- data/etc/earl.jsonld +17068 -0
- data/etc/earl.ttl +7114 -0
- data/etc/template.haml +205 -0
- data/etc/well-known +4 -0
- data/lib/rdf/tabular.rb +10 -4
- data/lib/rdf/tabular/format.rb +11 -7
- data/lib/rdf/tabular/metadata.rb +761 -314
- data/lib/rdf/tabular/reader.rb +276 -144
- data/spec/format_spec.rb +11 -8
- data/spec/matchers.rb +4 -4
- data/spec/metadata_spec.rb +120 -36
- data/spec/reader_spec.rb +56 -18
- data/spec/spec_helper.rb +10 -2
- data/spec/suite_helper.rb +35 -18
- data/spec/suite_spec.rb +26 -24
- metadata +15 -3
data/etc/template.haml
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
-# This template is used for generating a rollup EARL report. It expects to be
|
2
|
+
-# called with a single _tests_ local with the following structure
|
3
|
+
- require 'cgi'
|
4
|
+
- require 'digest'
|
5
|
+
|
6
|
+
!!! 5
|
7
|
+
%html{:prefix => "earl: http://www.w3.org/ns/earl# doap: http://usefulinc.com/ns/doap# mf: http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"}
|
8
|
+
- subjects = tests['testSubjects']
|
9
|
+
%head
|
10
|
+
%meta{"http-equiv" => "Content-Type", :content => "text/html;charset=utf-8"}
|
11
|
+
%meta{name: "viewport", content: "width=device-width, initial-scale=1.0"}
|
12
|
+
%link{rel: "stylesheet", type: "text/css", href: "https://www.w3.org/StyleSheets/TR/base"}
|
13
|
+
%title
|
14
|
+
= tests['name']
|
15
|
+
Implementation Report
|
16
|
+
:css
|
17
|
+
span[property='dc:description'] { display: none; }
|
18
|
+
td.PASS { color: green; }
|
19
|
+
td.FAIL { color: red; }
|
20
|
+
table.report {
|
21
|
+
border-width: 1px;
|
22
|
+
border-spacing: 2px;
|
23
|
+
border-style: outset;
|
24
|
+
border-color: gray;
|
25
|
+
border-collapse: separate;
|
26
|
+
background-color: white;
|
27
|
+
}
|
28
|
+
table.report th {
|
29
|
+
border-width: 1px;
|
30
|
+
padding: 1px;
|
31
|
+
border-style: inset;
|
32
|
+
border-color: gray;
|
33
|
+
background-color: white;
|
34
|
+
-moz-border-radius: ;
|
35
|
+
}
|
36
|
+
table.report td {
|
37
|
+
border-width: 1px;
|
38
|
+
padding: 1px;
|
39
|
+
border-style: inset;
|
40
|
+
border-color: gray;
|
41
|
+
background-color: white;
|
42
|
+
-moz-border-radius: ;
|
43
|
+
}
|
44
|
+
tr.summary {font-weight: bold;}
|
45
|
+
td.passed-all {color: green;}
|
46
|
+
td.passed-most {color: darkorange;}
|
47
|
+
td.passed-some {color: red;}
|
48
|
+
td.passed-none {color: gray;}
|
49
|
+
em.rfc2119 {
|
50
|
+
text-transform: lowercase;
|
51
|
+
font-variant: small-caps;
|
52
|
+
font-style: normal;
|
53
|
+
color: #900;
|
54
|
+
}
|
55
|
+
a.testlink {
|
56
|
+
color: inherit;
|
57
|
+
text-decoration: none;
|
58
|
+
}
|
59
|
+
a.testlink:hover {
|
60
|
+
text-decoration: underline;
|
61
|
+
}
|
62
|
+
%body
|
63
|
+
- subject_refs = {}
|
64
|
+
- tests['entries'].each {|m| m['title'] ||= m['description']}
|
65
|
+
%section{:about => tests['@id'], typeof: Array(tests['@type']).join(" ")}
|
66
|
+
%h2
|
67
|
+
Ruby rdf-tabular gem test results
|
68
|
+
%p
|
69
|
+
This document reports conformance for for the following specifications:
|
70
|
+
%ul
|
71
|
+
%li
|
72
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/tabular-data-model/"}="MetaModel for Tabular Data and Metadata on the Web"
|
73
|
+
%li
|
74
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/tabular-metadata/"}="Metadata Vocabulary for Tabular Data"
|
75
|
+
%li
|
76
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/csv2rdf/"}="Generating RDF from Tabular Data on the Web"
|
77
|
+
%li
|
78
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/csv2json/"}="Generating JSON from Tabular Data on the Web"
|
79
|
+
%p
|
80
|
+
This report is also available in
|
81
|
+
%a{:href => "earl.ttl"}
|
82
|
+
Turtle
|
83
|
+
%dl
|
84
|
+
- subjects.each_with_index do |subject, index|
|
85
|
+
- subject_refs[subject['@id']] = "subj_#{index}"
|
86
|
+
%dt{:id => subject_refs[subject['@id']]}
|
87
|
+
%a{:href => subject['@id']}
|
88
|
+
%span{:about => subject['@id'], property: "doap:name"}<= subject['name']
|
89
|
+
%dd{property: "earl:testSubjects", resource: subject['@id'], typeof: Array(subject['@type']).join(" "), :inlist => true}
|
90
|
+
%dl
|
91
|
+
- if subject['doapDesc']
|
92
|
+
%dt= "Description"
|
93
|
+
%dd{property: "doap:description", :lang => 'en'}<
|
94
|
+
~ CGI.escapeHTML subject['doapDesc']
|
95
|
+
- if subject['language']
|
96
|
+
%dt= "Programming Language"
|
97
|
+
%dd{property: "doap:programming-language"}<
|
98
|
+
~ CGI.escapeHTML subject['language']
|
99
|
+
- if subject['homepage']
|
100
|
+
%dt= "Home Page"
|
101
|
+
%dd{property: "doap:homepage"}
|
102
|
+
%a{href: subject['homepage']}
|
103
|
+
~ CGI.escapeHTML subject['homepage']
|
104
|
+
- if subject['developer']
|
105
|
+
%dt= "Developer"
|
106
|
+
%dd{:rel => "doap:developer"}
|
107
|
+
- subject['developer'].each do |dev|
|
108
|
+
%div{resource: dev['@id'], typeof: Array(dev['@type']).join(" ")}
|
109
|
+
- if dev.has_key?('@id')
|
110
|
+
%a{:href => dev['@id']}
|
111
|
+
%span{property: "foaf:name"}<
|
112
|
+
~ CGI.escapeHTML dev['foaf:name']
|
113
|
+
- else
|
114
|
+
%span{property: "foaf:name"}<
|
115
|
+
~ CGI.escapeHTML dev['foaf:name']
|
116
|
+
- if dev['foaf:homepage']
|
117
|
+
%a{property: "foaf:homepage", href: dev['foaf:homepage']}
|
118
|
+
~ CGI.escapeHTML dev['foaf:homepage']
|
119
|
+
%dt
|
120
|
+
Test Suite Compliance
|
121
|
+
%dd
|
122
|
+
%table.report
|
123
|
+
%tbody
|
124
|
+
- tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
|
125
|
+
- passed = manifest['entries'].select {|t| t['assertions'][index]['result']['outcome'] == 'earl:passed' }.length
|
126
|
+
- total = manifest['entries'].length
|
127
|
+
- pct = (passed * 100.0) / total
|
128
|
+
- cls = (pct == 100.0 ? 'passed-all' : (pct >= 85.0) ? 'passed-most' : (pct == 0.0 ? 'passed-none' : 'passed-some'))
|
129
|
+
%tr
|
130
|
+
%td
|
131
|
+
%a{href: "##{manifest['title']}"}
|
132
|
+
~ manifest['title']
|
133
|
+
%td{:class => cls}
|
134
|
+
= pct == 0.0 ? "Untested" : "#{passed}/#{total} (#{'%.1f' % pct}%)"
|
135
|
+
%section
|
136
|
+
%h2
|
137
|
+
Individual Test Results
|
138
|
+
- tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
|
139
|
+
- test_cases = manifest['entries']
|
140
|
+
%section{id: manifest['title'], typeof: manifest['@type'].join(" "), resource: manifest['@id']}
|
141
|
+
%h2{property: "dc:title mf:name"}<=manifest['title']
|
142
|
+
- Array(manifest['description']).each do |desc|
|
143
|
+
%p{property: "rdfs:comment"}<
|
144
|
+
~ CGI.escapeHTML desc
|
145
|
+
%table.report
|
146
|
+
- skip_subject = {}
|
147
|
+
- passed_tests = []
|
148
|
+
%tr
|
149
|
+
%th
|
150
|
+
Test
|
151
|
+
- subjects.each_with_index do |subject, index|
|
152
|
+
- subject_refs[subject['@id']] = "subj_#{index}"
|
153
|
+
-# If subject is untested for every test in this manifest, skip it
|
154
|
+
- skip_subject[subject['@id']] = manifest['entries'].all? {|t| t['assertions'][index]['result']['outcome'] == 'earl:untested'}
|
155
|
+
- unless skip_subject[subject['@id']]
|
156
|
+
%th
|
157
|
+
%a{:href => '#' + subject_refs[subject['@id']]}<=subject['name']
|
158
|
+
- test_cases.each do |test|
|
159
|
+
%tr{:rel => "mf:entries", typeof: test['@type'].join(" "), resource: test['@id'], :inlist => true}
|
160
|
+
%td
|
161
|
+
= "Test #{test['@id'].split("#").last}: #{CGI.escapeHTML test['title']}"
|
162
|
+
- test['assertions'].each_with_index do |assertion, ndx|
|
163
|
+
- next if skip_subject[assertion['subject']]
|
164
|
+
- pass_fail = assertion['result']['outcome'].split(':').last.upcase.sub(/(PASS|FAIL)ED$/, '\1')
|
165
|
+
- passed_tests[ndx] = (passed_tests[ndx] || 0) + (pass_fail == 'PASS' ? 1 : 0)
|
166
|
+
%td{:class => pass_fail, property: "earl:assertions", typeof: assertion['@type'], :inlist => true}
|
167
|
+
- if assertion['assertedBy']
|
168
|
+
%link{property: "earl:assertedBy", :href => assertion['assertedBy']}
|
169
|
+
%link{property: "earl:test", :href => assertion['test']}
|
170
|
+
%link{property: "earl:subject", :href => assertion['subject']}
|
171
|
+
- if assertion['mode']
|
172
|
+
%link{property: 'earl:mode', :href => assertion['mode']}
|
173
|
+
%span{property: "earl:result", typeof: assertion['result']['@type']}
|
174
|
+
%span{property: 'earl:outcome', resource: assertion['result']['outcome']}
|
175
|
+
= pass_fail
|
176
|
+
%tr.summary
|
177
|
+
%td
|
178
|
+
= "Percentage passed out of #{manifest['entries'].length} Tests"
|
179
|
+
- passed_tests.compact.each do |r|
|
180
|
+
- pct = (r * 100.0) / manifest['entries'].length
|
181
|
+
%td{:class => (pct == 100.0 ? 'passed-all' : (pct >= 95.0 ? 'passed-most' : 'passed-some'))}
|
182
|
+
= "#{'%.1f' % pct}%"
|
183
|
+
%section#appendix{property: "earl:generatedBy", resource: tests['generatedBy']['@id'], typeof: tests['generatedBy']['@type']}
|
184
|
+
%h2
|
185
|
+
Report Generation Software
|
186
|
+
- doap = tests['generatedBy']
|
187
|
+
- rel = doap['release']
|
188
|
+
%p
|
189
|
+
This report generated by
|
190
|
+
%span{property: "doap:name"}<
|
191
|
+
%a{:href => tests['generatedBy']['@id']}<
|
192
|
+
= doap['name']
|
193
|
+
%meta{property: "doap:shortdesc", :content => doap['shortdesc'], :lang => 'en'}
|
194
|
+
%meta{property: "doap:description", :content => doap['doapDesc'], :lang => 'en'}
|
195
|
+
version
|
196
|
+
%span{property: "doap:release", resource: rel['@id'], typeof: 'doap:Version'}
|
197
|
+
%span{property: "doap:revision"}<=rel['revision']
|
198
|
+
%meta{property: "doap:name", :content => rel['name']}
|
199
|
+
%meta{property: "doap:created", :content => rel['created'], :datatype => "xsd:date"}
|
200
|
+
an
|
201
|
+
%a{property: "doap:license", :href => doap['license']}<="Unlicensed"
|
202
|
+
%span{property: "doap:programming-language"}<="Ruby"
|
203
|
+
application. More information is available at
|
204
|
+
%a{property: "doap:homepage", :href => doap['homepage']}<=doap['homepage']
|
205
|
+
= "."
|
data/etc/well-known
ADDED
data/lib/rdf/tabular.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
$:.unshift(File.expand_path("..", __FILE__))
|
2
2
|
require 'rdf' # @see http://rubygems.org/gems/rdf
|
3
|
-
begin
|
4
|
-
require 'byebug' # REMOVE ME
|
5
|
-
rescue LoadError
|
6
|
-
end
|
7
3
|
require 'csv'
|
8
4
|
|
9
5
|
module RDF
|
@@ -28,6 +24,16 @@ module RDF
|
|
28
24
|
autoload :Transformation, 'rdf/tabular/metadata'
|
29
25
|
autoload :VERSION, 'rdf/tabular/version'
|
30
26
|
|
27
|
+
# Metadata errors detected
|
28
|
+
class Error < RDF::ReaderError; end
|
29
|
+
|
30
|
+
# Relative location of site-wide configuration file
|
31
|
+
SITE_WIDE_CONFIG = "/.well-known/csvm".freeze
|
32
|
+
SITE_WIDE_DEFAULT = %(
|
33
|
+
{+url}-metadata.json
|
34
|
+
csv-metadata.json
|
35
|
+
).gsub(/^\s+/, '').freeze
|
36
|
+
|
31
37
|
def self.debug?; @debug; end
|
32
38
|
def self.debug=(value); @debug = value; end
|
33
39
|
end
|
data/lib/rdf/tabular/format.rb
CHANGED
@@ -8,12 +8,13 @@ module RDF::Tabular
|
|
8
8
|
# RDF::Format.for(:tsv) #=> RDF::Tabular::Format
|
9
9
|
# RDF::Format.for("etc/foaf.csv")
|
10
10
|
# RDF::Format.for("etc/foaf.tsv")
|
11
|
-
# RDF::Format.for(:
|
12
|
-
# RDF::Format.for(:
|
13
|
-
# RDF::Format.for(:
|
14
|
-
# RDF::Format.for(:
|
15
|
-
# RDF::Format.for(:
|
16
|
-
# RDF::Format.for(:
|
11
|
+
# RDF::Format.for(file_name: "etc/foaf.csv")
|
12
|
+
# RDF::Format.for(file_name: "etc/foaf.tsv")
|
13
|
+
# RDF::Format.for(file_extension: "csv")
|
14
|
+
# RDF::Format.for(file_extension: "tsv")
|
15
|
+
# RDF::Format.for(content_type: "text/csv")
|
16
|
+
# RDF::Format.for(content_type: "text/tab-separated-values")
|
17
|
+
# RDF::Format.for(content_type: "application/csvm+json")
|
17
18
|
#
|
18
19
|
# @example Obtaining serialization format MIME types
|
19
20
|
# RDF::Format.content_types #=> {"text/csv" => [RDF::Tabular::Format]}
|
@@ -25,7 +26,10 @@ module RDF::Tabular
|
|
25
26
|
class Format < RDF::Format
|
26
27
|
content_type 'text/csv',
|
27
28
|
extensions: [:csv, :tsv],
|
28
|
-
alias:
|
29
|
+
alias: %w{
|
30
|
+
text/tab-separated-values
|
31
|
+
application/csvm+json
|
32
|
+
}
|
29
33
|
content_encoding 'utf-8'
|
30
34
|
|
31
35
|
reader { RDF::Tabular::Reader }
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -11,8 +11,7 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
11
11
|
# CSVM Metadata processor
|
12
12
|
#
|
13
13
|
# * Extracts Metadata from file or Hash definition
|
14
|
-
# *
|
15
|
-
# * Extract Metadata from a CSV file
|
14
|
+
# * Extract Embedded Metadata from a CSV file
|
16
15
|
# * Return table-level annotations
|
17
16
|
# * Return Column-level annotations
|
18
17
|
# * Return row iterator with column information
|
@@ -45,30 +44,28 @@ module RDF::Tabular
|
|
45
44
|
valueUrl: :uri_template,
|
46
45
|
}.freeze
|
47
46
|
INHERITED_DEFAULTS = {
|
48
|
-
aboutUrl: "".freeze,
|
49
47
|
default: "".freeze,
|
50
48
|
lang: "und",
|
51
49
|
null: "".freeze,
|
52
50
|
ordered: false,
|
53
|
-
propertyUrl: "".freeze,
|
54
51
|
required: false,
|
55
52
|
textDirection: "ltr".freeze,
|
56
|
-
valueUrl: "".freeze,
|
57
53
|
}.freeze
|
58
54
|
|
59
55
|
# Valid datatypes
|
60
56
|
DATATYPES = {
|
61
|
-
anyAtomicType: RDF::XSD.
|
57
|
+
anyAtomicType: RDF::XSD.anyAtomicType,
|
62
58
|
anyURI: RDF::XSD.anyURI,
|
63
59
|
base64Binary: RDF::XSD.basee65Binary,
|
64
60
|
boolean: RDF::XSD.boolean,
|
65
61
|
byte: RDF::XSD.byte,
|
66
62
|
date: RDF::XSD.date,
|
67
63
|
dateTime: RDF::XSD.dateTime,
|
68
|
-
|
64
|
+
dayTimeDuration: RDF::XSD.dayTimeDuration,
|
69
65
|
dateTimeStamp: RDF::XSD.dateTimeStamp,
|
70
66
|
decimal: RDF::XSD.decimal,
|
71
67
|
double: RDF::XSD.double,
|
68
|
+
duration: RDF::XSD.duration,
|
72
69
|
float: RDF::XSD.float,
|
73
70
|
ENTITY: RDF::XSD.ENTITY,
|
74
71
|
gDay: RDF::XSD.gDay,
|
@@ -84,6 +81,7 @@ module RDF::Tabular
|
|
84
81
|
Name: RDF::XSD.Name,
|
85
82
|
NCName: RDF::XSD.NCName,
|
86
83
|
negativeInteger: RDF::XSD.negativeInteger,
|
84
|
+
NMTOKEN: RDF::XSD.NMTOKEN,
|
87
85
|
nonNegativeInteger: RDF::XSD.nonNegativeInteger,
|
88
86
|
nonPositiveInteger: RDF::XSD.nonPositiveInteger,
|
89
87
|
normalizedString: RDF::XSD.normalizedString,
|
@@ -100,7 +98,7 @@ module RDF::Tabular
|
|
100
98
|
unsignedShort: RDF::XSD.unsignedShort,
|
101
99
|
yearMonthDuration: RDF::XSD.yearMonthDuration,
|
102
100
|
|
103
|
-
any: RDF::XSD.
|
101
|
+
any: RDF::XSD.anyAtomicType,
|
104
102
|
binary: RDF::XSD.base64Binary,
|
105
103
|
datetime: RDF::XSD.dateTime,
|
106
104
|
html: RDF.HTML,
|
@@ -115,7 +113,7 @@ module RDF::Tabular
|
|
115
113
|
|
116
114
|
# Local version of the context
|
117
115
|
# @return [JSON::LD::Context]
|
118
|
-
LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__))
|
116
|
+
LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__)).freeze
|
119
117
|
|
120
118
|
# ID of this Metadata
|
121
119
|
# @return [RDF::URI]
|
@@ -139,7 +137,9 @@ module RDF::Tabular
|
|
139
137
|
#
|
140
138
|
# @param [String] path
|
141
139
|
# @param [Hash{Symbol => Object}] options
|
142
|
-
# see `RDF::Util::File.open_file` in RDF.rb
|
140
|
+
# see `RDF::Util::File.open_file` in RDF.rb and {#new}
|
141
|
+
# @yield [Metadata]
|
142
|
+
# @raise [IOError] if file not found
|
143
143
|
def self.open(path, options = {})
|
144
144
|
options = options.merge(
|
145
145
|
headers: {
|
@@ -152,8 +152,25 @@ module RDF::Tabular
|
|
152
152
|
end
|
153
153
|
end
|
154
154
|
|
155
|
+
# Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
|
156
|
+
# @param [String] base, the URL used for finding the file
|
157
|
+
# @return [Array<String>, false]
|
158
|
+
def self.site_wide_config(base)
|
159
|
+
require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
|
160
|
+
@cache ||= RDF::Util::Cache.new(-1)
|
161
|
+
|
162
|
+
config_loc = RDF::URI(base).join(SITE_WIDE_CONFIG).to_s
|
163
|
+
# Only load if we haven't tried before. Use `SITE_WIDE_DEFAULT` if not found
|
164
|
+
if @cache[config_loc].nil?
|
165
|
+
@cache[config_loc] = RDF::Util::File.open_file(config_loc) do |rd|
|
166
|
+
rd.each_line.to_a
|
167
|
+
end rescue SITE_WIDE_DEFAULT.split
|
168
|
+
end
|
169
|
+
@cache[config_loc]
|
170
|
+
end
|
171
|
+
|
155
172
|
##
|
156
|
-
# Return metadata for a file, based on user-specified and
|
173
|
+
# Return metadata for a file, based on user-specified, linked, and site-wide location configuration from an input file
|
157
174
|
# @param [IO, StringIO] input
|
158
175
|
# @param [Hash{Symbol => Object}] options
|
159
176
|
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
@@ -175,22 +192,46 @@ module RDF::Tabular
|
|
175
192
|
# Search for metadata until found
|
176
193
|
|
177
194
|
# load link metadata, if available
|
178
|
-
|
179
|
-
if input.respond_to?(:links) &&
|
195
|
+
all_locs = []
|
196
|
+
if !metadata && input.respond_to?(:links) &&
|
180
197
|
link = input.links.find_link(%w(rel describedby))
|
181
|
-
|
198
|
+
link_loc = RDF::URI(base).join(link.href).to_s
|
199
|
+
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
200
|
+
all_locs << link_loc if md
|
201
|
+
# Metadata must describe file to be useful
|
202
|
+
metadata = md if md && md.describes_file?(base)
|
182
203
|
end
|
183
204
|
|
184
|
-
|
185
|
-
|
205
|
+
locs = []
|
206
|
+
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
|
+
if !metadata && base
|
208
|
+
templates = site_wide_config(base)
|
209
|
+
debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
|
+
locs = templates.map do |template|
|
211
|
+
t = Addressable::Template.new(template)
|
212
|
+
RDF::URI(base).join(t.expand(url: base).to_s)
|
213
|
+
end
|
214
|
+
debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
|
+
|
216
|
+
locs.each do |loc|
|
217
|
+
metadata ||= begin
|
218
|
+
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
|
+
# Metadata must describe file to be useful
|
220
|
+
all_locs << loc if md
|
221
|
+
md if md && md.describes_file?(base)
|
222
|
+
rescue IOError
|
223
|
+
debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
224
|
+
nil
|
225
|
+
end
|
226
|
+
end
|
186
227
|
end
|
187
228
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
229
|
+
# If Metadata was found, but no metadata describes the file, issue a warning
|
230
|
+
if !all_locs.empty? && !metadata
|
231
|
+
warnings = options.fetch(:warnings, [])
|
232
|
+
warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
|
233
|
+
if options[:validate] && !options[:warnings]
|
234
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
194
235
|
end
|
195
236
|
end
|
196
237
|
|
@@ -198,7 +239,7 @@ module RDF::Tabular
|
|
198
239
|
metadata = case
|
199
240
|
when metadata then metadata
|
200
241
|
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
201
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: []}, options)
|
242
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
202
243
|
end
|
203
244
|
|
204
245
|
# Make TableGroup, if not already
|
@@ -217,6 +258,8 @@ module RDF::Tabular
|
|
217
258
|
else ::JSON.parse(input.to_s)
|
218
259
|
end
|
219
260
|
|
261
|
+
raise ::JSON::ParserError unless object.is_a?(Hash)
|
262
|
+
|
220
263
|
unless options[:parent]
|
221
264
|
# Add context, if not set (which it should be)
|
222
265
|
object['@context'] ||= options.delete(:@context) || options[:context]
|
@@ -237,7 +280,7 @@ module RDF::Tabular
|
|
237
280
|
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
238
281
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
239
282
|
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
|
240
|
-
when %w(columns primaryKey foreignKeys).any? {|k| object_keys.include?(k)} then :Schema
|
283
|
+
when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
|
241
284
|
when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
|
242
285
|
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
243
286
|
when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
|
@@ -251,13 +294,15 @@ module RDF::Tabular
|
|
251
294
|
when :Column then RDF::Tabular::Column
|
252
295
|
when :Dialect then RDF::Tabular::Dialect
|
253
296
|
else
|
254
|
-
raise Error, "
|
297
|
+
raise Error, "Unknown metadata type: #{type.inspect}"
|
255
298
|
end
|
256
299
|
end
|
257
300
|
|
258
301
|
md = klass.allocate
|
259
302
|
md.send(:initialize, object, options)
|
260
303
|
md
|
304
|
+
rescue ::JSON::ParserError
|
305
|
+
raise Error, "Expected input to be a JSON Object"
|
261
306
|
end
|
262
307
|
|
263
308
|
##
|
@@ -271,6 +316,8 @@ module RDF::Tabular
|
|
271
316
|
# Context used for this metadata. Taken from input if not provided
|
272
317
|
# @option options [RDF::URI] :base
|
273
318
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
319
|
+
# @option options [Boolean] :normalize normalize the object
|
320
|
+
# @option options [Boolean] :validate Strict metadata validation
|
274
321
|
# @raise [Error]
|
275
322
|
# @return [Metadata]
|
276
323
|
def initialize(input, options = {})
|
@@ -285,15 +332,15 @@ module RDF::Tabular
|
|
285
332
|
@context = case input['@context']
|
286
333
|
when Array
|
287
334
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
288
|
-
LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
|
335
|
+
LOCAL_CONTEXT.dup.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
|
289
336
|
when Hash
|
290
337
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
291
|
-
LOCAL_CONTEXT.parse(input['@context'])
|
292
|
-
when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT
|
338
|
+
LOCAL_CONTEXT.dup.parse(input['@context'])
|
339
|
+
when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT.dup
|
293
340
|
else
|
294
341
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
295
342
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
296
|
-
LOCAL_CONTEXT
|
343
|
+
LOCAL_CONTEXT.dup
|
297
344
|
end
|
298
345
|
end
|
299
346
|
|
@@ -326,17 +373,17 @@ module RDF::Tabular
|
|
326
373
|
when :url
|
327
374
|
# URL of CSV relative to metadata
|
328
375
|
object[:url] = value
|
329
|
-
@url = base.join(value)
|
330
|
-
@
|
376
|
+
@url = @options[:base].join(value)
|
377
|
+
@options[:base] = @url if @context # Use as base for expanding IRIs
|
331
378
|
when :@id
|
332
379
|
# metadata identifier
|
333
380
|
object[:@id] = if value.is_a?(String)
|
334
381
|
value
|
335
382
|
else
|
336
383
|
warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
337
|
-
""
|
384
|
+
"" # Default value
|
338
385
|
end
|
339
|
-
@id = base.join(object[:@id])
|
386
|
+
@id = @options[:base].join(object[:@id])
|
340
387
|
else
|
341
388
|
if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
|
342
389
|
self.send("#{key}=".to_sym, value)
|
@@ -348,7 +395,15 @@ module RDF::Tabular
|
|
348
395
|
end
|
349
396
|
|
350
397
|
# Set type from @type, if present and not otherwise defined
|
351
|
-
@type
|
398
|
+
@type = object[:@type].to_sym if object[:@type]
|
399
|
+
|
400
|
+
if options[:normalize]
|
401
|
+
# If normalizing, also remove remaining @context
|
402
|
+
self.normalize!
|
403
|
+
@context = nil
|
404
|
+
object.delete(:@context)
|
405
|
+
end
|
406
|
+
|
352
407
|
if reason
|
353
408
|
debug("md#initialize") {reason}
|
354
409
|
debug("md#initialize") {"filenames: #{filenames}"}
|
@@ -356,8 +411,14 @@ module RDF::Tabular
|
|
356
411
|
end
|
357
412
|
end
|
358
413
|
|
359
|
-
# Setters
|
414
|
+
# Getters and Setters
|
360
415
|
INHERITED_PROPERTIES.keys.each do |key|
|
416
|
+
define_method(key) do
|
417
|
+
object.fetch(key) do
|
418
|
+
parent ? parent.send(key) : default_value(key)
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
361
422
|
define_method("#{key}=".to_sym) do |value|
|
362
423
|
invalid = case key
|
363
424
|
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
@@ -370,7 +431,7 @@ module RDF::Tabular
|
|
370
431
|
when :ordered, :required
|
371
432
|
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
372
433
|
when :separator
|
373
|
-
"
|
434
|
+
"string or null" unless value.nil? || value.is_a?(String)
|
374
435
|
when :textDirection
|
375
436
|
"rtl or ltr" unless %(rtl ltr).include?(value)
|
376
437
|
when :datatype
|
@@ -379,7 +440,7 @@ module RDF::Tabular
|
|
379
440
|
|
380
441
|
if invalid
|
381
442
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
382
|
-
object
|
443
|
+
object.delete(key)
|
383
444
|
else
|
384
445
|
object[key] = value
|
385
446
|
end
|
@@ -399,18 +460,19 @@ module RDF::Tabular
|
|
399
460
|
# An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
|
400
461
|
# when loading a remote schema, assign @id from it's location if not already set
|
401
462
|
def tableSchema=(value)
|
402
|
-
case value
|
463
|
+
object[:tableSchema] = case value
|
403
464
|
when String
|
404
|
-
link = base.join(value).to_s
|
405
|
-
|
406
|
-
|
407
|
-
|
465
|
+
link = context.base.join(value).to_s
|
466
|
+
md = Schema.open(link, @options.merge(parent: self, context: nil, normalize: true))
|
467
|
+
md[:@id] ||= link
|
468
|
+
md
|
408
469
|
when Hash
|
409
|
-
|
470
|
+
Schema.new(value, @options.merge(parent: self, context: nil))
|
410
471
|
when Schema
|
411
|
-
|
472
|
+
value
|
412
473
|
else
|
413
474
|
warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
475
|
+
Schema.new({}, @options.merge(parent: self, context: nil))
|
414
476
|
end
|
415
477
|
end
|
416
478
|
|
@@ -445,13 +507,16 @@ module RDF::Tabular
|
|
445
507
|
end
|
446
508
|
|
447
509
|
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
448
|
-
@dialect = case value
|
510
|
+
@dialect = object[:dialect] = case value
|
449
511
|
when String
|
450
|
-
|
512
|
+
link = context.base.join(value).to_s
|
513
|
+
md = Metadata.open(link, @options.merge(parent: self, context: nil, normalize: true))
|
514
|
+
md[:@id] ||= link
|
515
|
+
md
|
451
516
|
when Hash
|
452
|
-
|
517
|
+
Dialect.new(value, @options.merge(parent: self, context: nil))
|
453
518
|
when Dialect
|
454
|
-
|
519
|
+
value
|
455
520
|
else
|
456
521
|
warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
457
522
|
nil
|
@@ -460,16 +525,18 @@ module RDF::Tabular
|
|
460
525
|
|
461
526
|
# Set new datatype
|
462
527
|
# @return [Dialect]
|
528
|
+
# @raise [Error] if datatype is not valid
|
463
529
|
def datatype=(value)
|
464
530
|
val = case value
|
465
531
|
when Hash then Datatype.new(value, parent: self)
|
466
532
|
else Datatype.new({base: value}, parent: self)
|
467
533
|
end
|
468
534
|
|
469
|
-
if val.valid?
|
535
|
+
if val.valid? || value.is_a?(Hash)
|
536
|
+
# Set it if it was specified as an object, which may cause validation errors later
|
470
537
|
object[:datatype] = val
|
471
538
|
else
|
472
|
-
warn "#{type} has invalid property 'datatype': expected a
|
539
|
+
warn "#{type} has invalid property 'datatype': expected a built-in or an object"
|
473
540
|
end
|
474
541
|
end
|
475
542
|
|
@@ -538,7 +605,7 @@ module RDF::Tabular
|
|
538
605
|
value = object[key]
|
539
606
|
case key
|
540
607
|
when :base
|
541
|
-
|
608
|
+
errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
|
542
609
|
when :columns
|
543
610
|
value.each do |v|
|
544
611
|
begin
|
@@ -549,14 +616,20 @@ module RDF::Tabular
|
|
549
616
|
end
|
550
617
|
column_names = value.map(&:name)
|
551
618
|
errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
552
|
-
when :dialect, :tables, :tableSchema, :transformations
|
619
|
+
when :datatype, :dialect, :tables, :tableSchema, :transformations
|
553
620
|
Array(value).each do |t|
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
621
|
+
# Make sure value is of appropriate class
|
622
|
+
if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
|
623
|
+
begin
|
624
|
+
t.validate!
|
625
|
+
rescue Error => e
|
626
|
+
errors << e.message
|
627
|
+
end
|
628
|
+
else
|
629
|
+
errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
|
558
630
|
end
|
559
631
|
end
|
632
|
+
errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
|
560
633
|
when :foreignKeys
|
561
634
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
562
635
|
value.each do |fk|
|
@@ -577,13 +650,13 @@ module RDF::Tabular
|
|
577
650
|
errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
578
651
|
end
|
579
652
|
# resource is the URL of a Table in the TableGroup
|
580
|
-
ref = base.join(reference['resource']).to_s
|
653
|
+
ref = context.base.join(reference['resource']).to_s
|
581
654
|
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
582
655
|
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
583
656
|
table.tableSchema if table
|
584
657
|
elsif reference.has_key?('schemaReference')
|
585
658
|
# resource is the @id of a Schema in the TableGroup
|
586
|
-
ref = base.join(reference['schemaReference']).to_s
|
659
|
+
ref = context.base.join(reference['schemaReference']).to_s
|
587
660
|
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
588
661
|
case tables.length
|
589
662
|
when 0
|
@@ -608,6 +681,114 @@ module RDF::Tabular
|
|
608
681
|
errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
609
682
|
end
|
610
683
|
end
|
684
|
+
when :format
|
685
|
+
case value
|
686
|
+
when Hash
|
687
|
+
# Object form only appropriate for numeric type
|
688
|
+
unless %w(
|
689
|
+
decimal integer long int short byte double float number
|
690
|
+
nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
|
691
|
+
unsignedLong unsignedInt unsignedShort unsignedByte
|
692
|
+
).include?(self.base)
|
693
|
+
warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
|
694
|
+
object.delete(:format) # act as if not set
|
695
|
+
end
|
696
|
+
|
697
|
+
# Otherwise, if it exists, its a UAX35 number pattern
|
698
|
+
begin
|
699
|
+
parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
|
700
|
+
rescue ArgumentError => e
|
701
|
+
warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
|
702
|
+
object[:format].delete("pattern") # act as if not set
|
703
|
+
end
|
704
|
+
else
|
705
|
+
case self.base
|
706
|
+
when 'boolean'
|
707
|
+
unless value.split("|").length == 2
|
708
|
+
warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
|
709
|
+
object.delete(:format) # act as if not set
|
710
|
+
end
|
711
|
+
when :decimal, :integer, :long, :int, :short, :byte,
|
712
|
+
:nonNegativeInteger, :positiveInteger,
|
713
|
+
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
714
|
+
:nonPositiveInteger, :negativeInteger,
|
715
|
+
:double, :float, :number
|
716
|
+
begin
|
717
|
+
parse_uax35_number(value, nil)
|
718
|
+
rescue ArgumentError => e
|
719
|
+
warn "#{type} has invalid property '#{key}': #{e.message}"
|
720
|
+
object.delete(:format) # act as if not set
|
721
|
+
end
|
722
|
+
when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
|
723
|
+
# Parse and validate format
|
724
|
+
begin
|
725
|
+
parse_uax35_date(value, nil)
|
726
|
+
rescue ArgumentError => e
|
727
|
+
warn "#{type} has invalid property '#{key}': #{e.message}"
|
728
|
+
object.delete(:format) # act as if not set
|
729
|
+
end
|
730
|
+
else
|
731
|
+
# Otherwise, if it exists, its a regular expression
|
732
|
+
begin
|
733
|
+
Regexp.compile(value)
|
734
|
+
rescue
|
735
|
+
warn "#{type} has invalid property '#{key}': #{$!.message}"
|
736
|
+
object.delete(:format) # act as if not set
|
737
|
+
end
|
738
|
+
end
|
739
|
+
end
|
740
|
+
when :length, :minLength, :maxLength
|
741
|
+
# Applications must raise an error if both length and minLength are specified and length is less than minLength.
|
742
|
+
# Similarly, applications must raise an error if both length and maxLength are specified and length is greater than maxLength.
|
743
|
+
if object[:length]
|
744
|
+
case key
|
745
|
+
when :minLength
|
746
|
+
errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
|
747
|
+
when :maxLength
|
748
|
+
errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
|
749
|
+
end
|
750
|
+
end
|
751
|
+
|
752
|
+
# Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
|
753
|
+
if key == :maxLength && object[:minLength]
|
754
|
+
errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
|
755
|
+
end
|
756
|
+
|
757
|
+
# Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
|
758
|
+
unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
|
759
|
+
errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
|
760
|
+
end
|
761
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
762
|
+
case self.base
|
763
|
+
when 'decimal', 'integer', 'long', 'int', 'short', 'byte', 'double', 'number', 'float',
|
764
|
+
'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
|
765
|
+
'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
|
766
|
+
'duration', 'dayTimeDuration', 'yearMonthDuration'
|
767
|
+
errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
|
768
|
+
|
769
|
+
case key
|
770
|
+
when :minInclusive
|
771
|
+
# Applications MUST raise an error if both minInclusive and minExclusive are specified
|
772
|
+
errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
|
773
|
+
|
774
|
+
# Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
|
775
|
+
errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
|
776
|
+
|
777
|
+
# Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
|
778
|
+
errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
|
779
|
+
when :maxInclusive
|
780
|
+
# Applications MUST raise an error if both maxInclusive and maxExclusive are specified
|
781
|
+
errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
|
782
|
+
when :minExclusive
|
783
|
+
# Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
|
784
|
+
errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
|
785
|
+
|
786
|
+
# Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
|
787
|
+
errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
|
788
|
+
end
|
789
|
+
else
|
790
|
+
errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
|
791
|
+
end
|
611
792
|
when :notes
|
612
793
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
613
794
|
errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
@@ -617,7 +798,7 @@ module RDF::Tabular
|
|
617
798
|
rescue Error => e
|
618
799
|
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
619
800
|
end
|
620
|
-
when :primaryKey
|
801
|
+
when :primaryKey, :rowTitles
|
621
802
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
622
803
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
623
804
|
Array(value).each do |k|
|
@@ -628,9 +809,18 @@ module RDF::Tabular
|
|
628
809
|
when :@id
|
629
810
|
# Must not be a BNode
|
630
811
|
if value.to_s.start_with?("_:")
|
631
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:"
|
812
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
813
|
+
end
|
814
|
+
|
815
|
+
# Datatype @id MUST NOT be the URL of a built-in type
|
816
|
+
if self.is_a?(Datatype) && DATATYPES.values.include?(value)
|
817
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
|
632
818
|
end
|
633
819
|
when :@type
|
820
|
+
# Must not be a BNode
|
821
|
+
if value.to_s.start_with?("_:")
|
822
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
823
|
+
end
|
634
824
|
unless value.to_sym == type
|
635
825
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
|
636
826
|
end
|
@@ -690,7 +880,9 @@ module RDF::Tabular
|
|
690
880
|
next
|
691
881
|
end
|
692
882
|
number += 1
|
693
|
-
|
883
|
+
row = Row.new(data, self, number, number + skipped, @options)
|
884
|
+
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
885
|
+
yield(row)
|
694
886
|
end
|
695
887
|
end
|
696
888
|
|
@@ -775,18 +967,39 @@ module RDF::Tabular
|
|
775
967
|
object.keys.any? {|k| k.to_s.include?(':')}
|
776
968
|
end
|
777
969
|
|
970
|
+
# Does this metadata describe the file (URL)?
|
971
|
+
# @param [RDF::URL] url
|
972
|
+
# @return [Boolean]
|
973
|
+
def describes_file?(url)
|
974
|
+
case self
|
975
|
+
when TableGroup
|
976
|
+
tables.any? {|t| t.url == url}
|
977
|
+
else
|
978
|
+
self.url == url
|
979
|
+
end
|
980
|
+
end
|
981
|
+
|
778
982
|
# Verify that the metadata we're using is compatible with embedded metadata
|
779
983
|
# @param [Table] other
|
780
984
|
# @raise [Error] if not compatible
|
781
985
|
def verify_compatible!(other)
|
782
986
|
if self.is_a?(TableGroup)
|
783
987
|
unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
|
784
|
-
|
988
|
+
if @options[:validate]
|
989
|
+
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
990
|
+
else
|
991
|
+
warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
992
|
+
end
|
785
993
|
end
|
786
994
|
else
|
787
995
|
# Tables must have the same url
|
788
|
-
|
789
|
-
|
996
|
+
unless url == other.url
|
997
|
+
if @options[:validate]
|
998
|
+
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
999
|
+
else
|
1000
|
+
warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1001
|
+
end
|
1002
|
+
end
|
790
1003
|
|
791
1004
|
# Each column description within B MUST match the corresponding column description in A for non-virtual columns
|
792
1005
|
non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
|
@@ -798,23 +1011,41 @@ module RDF::Tabular
|
|
798
1011
|
index = 0
|
799
1012
|
object_columns.all? do |cb|
|
800
1013
|
ca = non_virtual_columns[index]
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
1014
|
+
ta = ca.titles || {}
|
1015
|
+
tb = cb.titles || {}
|
1016
|
+
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1017
|
+
true
|
1018
|
+
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1019
|
+
raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
|
1020
|
+
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1021
|
+
# If validating, column compatibility requires strict match between titles
|
1022
|
+
titles_match = case
|
1023
|
+
when Array(ta['und']).any? {|t| tb.values.flatten.compact.include?(t)}
|
1024
|
+
true
|
1025
|
+
when Array(tb['und']).any? {|t| ta.values.flatten.compact.include?(t)}
|
1026
|
+
true
|
1027
|
+
when ta.any? {|lang, values| !(Array(tb[lang]) & Array(values)).empty?}
|
1028
|
+
# Match on title and language
|
1029
|
+
true
|
1030
|
+
else
|
1031
|
+
# Match if a language from ta is a prefix of a language from tb with matching titles
|
1032
|
+
ta.any? do |la, values|
|
1033
|
+
tb.keys.any? do |lb|
|
1034
|
+
(la.start_with?(lb) || lb.start_with?(la)) && !(Array(tb[lb]) & Array(values)).empty?
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
if titles_match
|
1040
|
+
true
|
1041
|
+
elsif !@options[:validate]
|
1042
|
+
# If not validating, columns don't match, but processing continues
|
1043
|
+
warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
|
1044
|
+
true
|
1045
|
+
else
|
1046
|
+
raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
|
1047
|
+
end
|
1048
|
+
end
|
818
1049
|
index += 1
|
819
1050
|
end
|
820
1051
|
end
|
@@ -822,7 +1053,7 @@ module RDF::Tabular
|
|
822
1053
|
end
|
823
1054
|
|
824
1055
|
def inspect
|
825
|
-
self.class.name + object.inspect
|
1056
|
+
self.class.name + (respond_to?(:to_atd) ? to_atd : object).inspect
|
826
1057
|
end
|
827
1058
|
|
828
1059
|
# Proxy to @object
|
@@ -830,7 +1061,7 @@ module RDF::Tabular
|
|
830
1061
|
def []=(key, value); object[key] = value; end
|
831
1062
|
def each(&block); object.each(&block); end
|
832
1063
|
def ==(other)
|
833
|
-
object == (other.is_a?(Hash) ? other : other.object)
|
1064
|
+
object == (other.is_a?(Hash) ? other : (other.respond_to?(:object) ? other.object : other))
|
834
1065
|
end
|
835
1066
|
def to_json(args=nil); object.to_json(args); end
|
836
1067
|
|
@@ -845,8 +1076,6 @@ module RDF::Tabular
|
|
845
1076
|
normalize_jsonld(key, value)
|
846
1077
|
when ->(k) {key.to_s == '@context'}
|
847
1078
|
"http://www.w3.org/ns/csvw"
|
848
|
-
when :link
|
849
|
-
base.join(value).to_s
|
850
1079
|
when :array
|
851
1080
|
value = [value] unless value.is_a?(Array)
|
852
1081
|
value.map do |v|
|
@@ -854,13 +1083,15 @@ module RDF::Tabular
|
|
854
1083
|
v.normalize!
|
855
1084
|
elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
|
856
1085
|
# SPEC SUGGESTION: special case for foreignKeys
|
857
|
-
ref["resource"] = base.join(ref["resource"]).to_s if ref["resource"]
|
858
|
-
ref["schemaReference"] = base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
|
1086
|
+
ref["resource"] = context.base.join(ref["resource"]).to_s if ref["resource"]
|
1087
|
+
ref["schemaReference"] = context.base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
|
859
1088
|
v
|
860
1089
|
else
|
861
1090
|
v
|
862
1091
|
end
|
863
1092
|
end
|
1093
|
+
when :link
|
1094
|
+
context.base.join(value).to_s
|
864
1095
|
when :object
|
865
1096
|
case value
|
866
1097
|
when Metadata then value.normalize!
|
@@ -872,6 +1103,14 @@ module RDF::Tabular
|
|
872
1103
|
end
|
873
1104
|
when :natural_language
|
874
1105
|
value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
|
1106
|
+
when :atomic
|
1107
|
+
case key
|
1108
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1109
|
+
# Convert to a typed literal based on `base`. This will be validated later
|
1110
|
+
RDF::Literal(value, datatype: DATATYPES[self.base.to_sym])
|
1111
|
+
else
|
1112
|
+
value
|
1113
|
+
end
|
875
1114
|
else
|
876
1115
|
value
|
877
1116
|
end
|
@@ -901,10 +1140,10 @@ module RDF::Tabular
|
|
901
1140
|
raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
902
1141
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
903
1142
|
raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
|
904
|
-
elsif value['@language'] && !BCP47::Language.identify(value['@language'])
|
905
|
-
warn "Value object with @language must use valid language: #{value.to_json}"
|
1143
|
+
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1144
|
+
warn "Value object with @language must use valid language: #{value.to_json}"
|
906
1145
|
value.delete('@language')
|
907
|
-
elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
|
1146
|
+
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
908
1147
|
raise Error, "Value object with @type must defined type: #{value.to_json}"
|
909
1148
|
end
|
910
1149
|
value
|
@@ -919,7 +1158,7 @@ module RDF::Tabular
|
|
919
1158
|
Array(v).each do |vv|
|
920
1159
|
# Validate that all type values transform to absolute IRIs
|
921
1160
|
resource = context.expand_iri(vv, vocab: true)
|
922
|
-
raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.
|
1161
|
+
raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
|
923
1162
|
end
|
924
1163
|
nv[k] = v
|
925
1164
|
when /^(@|_:)/
|
@@ -981,13 +1220,6 @@ module RDF::Tabular
|
|
981
1220
|
end
|
982
1221
|
end
|
983
1222
|
|
984
|
-
def inherited_property_value(method)
|
985
|
-
# Inherited properties
|
986
|
-
object.fetch(method.to_sym) do
|
987
|
-
parent.send(method) if parent
|
988
|
-
end
|
989
|
-
end
|
990
|
-
|
991
1223
|
def default_value(prop)
|
992
1224
|
self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
|
993
1225
|
end
|
@@ -1033,24 +1265,30 @@ module RDF::Tabular
|
|
1033
1265
|
transformations: :array,
|
1034
1266
|
}.freeze
|
1035
1267
|
DEFAULTS = {
|
1036
|
-
tableDirection: "
|
1268
|
+
tableDirection: "auto".freeze,
|
1037
1269
|
}.freeze
|
1038
1270
|
REQUIRED = [:tables].freeze
|
1039
1271
|
|
1040
|
-
# Setters
|
1272
|
+
# Getters and Setters
|
1041
1273
|
PROPERTIES.each do |key, type|
|
1042
|
-
next if [:
|
1274
|
+
next if [:dialect].include?(key)
|
1275
|
+
|
1276
|
+
define_method(key) do
|
1277
|
+
object.fetch(key, DEFAULTS[key])
|
1278
|
+
end
|
1279
|
+
|
1280
|
+
next if [:tables, :tableSchema, :transformations].include?(key)
|
1043
1281
|
define_method("#{key}=".to_sym) do |value|
|
1044
1282
|
invalid = case key
|
1045
1283
|
when :tableDirection
|
1046
|
-
"rtl, ltr, or
|
1284
|
+
"rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
|
1047
1285
|
when :notes, :tables, :tableSchema, :dialect, :transformations
|
1048
1286
|
# We handle this through a separate setters
|
1049
1287
|
end
|
1050
1288
|
|
1051
1289
|
if invalid
|
1052
1290
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1053
|
-
object
|
1291
|
+
object.delete(key)
|
1054
1292
|
else
|
1055
1293
|
object[key] = value
|
1056
1294
|
end
|
@@ -1063,15 +1301,6 @@ module RDF::Tabular
|
|
1063
1301
|
super || tables.any? {|t| t.has_annotations? }
|
1064
1302
|
end
|
1065
1303
|
|
1066
|
-
# Logic for accessing elements as accessors
|
1067
|
-
def method_missing(method, *args)
|
1068
|
-
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1069
|
-
inherited_property_value(method.to_sym)
|
1070
|
-
else
|
1071
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1072
|
-
end
|
1073
|
-
end
|
1074
|
-
|
1075
1304
|
##
|
1076
1305
|
# Iterate over all tables
|
1077
1306
|
# @yield [Table]
|
@@ -1102,7 +1331,7 @@ module RDF::Tabular
|
|
1102
1331
|
object.inject({
|
1103
1332
|
"@id" => (id.to_s if id),
|
1104
1333
|
"@type" => "AnnotatedTableGroup",
|
1105
|
-
"tables" =>
|
1334
|
+
"tables" => Array(self.tables).map(&:to_atd)
|
1106
1335
|
}) do |memo, (k, v)|
|
1107
1336
|
memo[k.to_s] ||= v
|
1108
1337
|
memo
|
@@ -1124,32 +1353,37 @@ module RDF::Tabular
|
|
1124
1353
|
}.freeze
|
1125
1354
|
DEFAULTS = {
|
1126
1355
|
suppressOutput: false,
|
1127
|
-
tableDirection: "
|
1356
|
+
tableDirection: "auto".freeze,
|
1128
1357
|
}.freeze
|
1129
1358
|
REQUIRED = [:url].freeze
|
1130
1359
|
|
1131
|
-
# Setters
|
1360
|
+
# Getters and Setters
|
1132
1361
|
PROPERTIES.each do |key, type|
|
1133
|
-
next if [:
|
1362
|
+
next if [:dialect, :url].include?(key)
|
1363
|
+
define_method(key) do
|
1364
|
+
object.fetch(key, DEFAULTS[key])
|
1365
|
+
end
|
1366
|
+
|
1367
|
+
next if [:tableSchema, :transformations].include?(key)
|
1134
1368
|
define_method("#{key}=".to_sym) do |value|
|
1135
1369
|
invalid = case key
|
1136
1370
|
when :suppressOutput
|
1137
1371
|
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1138
1372
|
when :tableDirection
|
1139
|
-
"rtl, ltr, or
|
1373
|
+
"rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
|
1140
1374
|
when :url
|
1141
|
-
"valid URL" unless value.is_a?(String) && base.join(value).valid?
|
1375
|
+
"valid URL" unless value.is_a?(String) && context.base.join(value).valid?
|
1142
1376
|
when :notes, :tableSchema, :dialect, :transformations
|
1143
1377
|
# We handle this through a separate setters
|
1144
1378
|
end
|
1145
1379
|
|
1146
1380
|
if invalid
|
1147
1381
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1148
|
-
object
|
1382
|
+
object.delete(key)
|
1149
1383
|
elsif key == :url
|
1150
1384
|
# URL of CSV relative to metadata
|
1151
1385
|
object[:url] = value
|
1152
|
-
@url = base.join(value)
|
1386
|
+
@url = context.base.join(value)
|
1153
1387
|
@context.base = @url if @context # Use as base for expanding IRIs
|
1154
1388
|
else
|
1155
1389
|
object[key] = value
|
@@ -1180,22 +1414,13 @@ module RDF::Tabular
|
|
1180
1414
|
"@id" => (id.to_s if id),
|
1181
1415
|
"@type" => "AnnotatedTable",
|
1182
1416
|
"url" => self.url.to_s,
|
1183
|
-
"columns" => tableSchema.columns.map(&:to_atd),
|
1417
|
+
"columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
|
1184
1418
|
"rows" => []
|
1185
1419
|
}) do |memo, (k, v)|
|
1186
1420
|
memo[k.to_s] ||= v
|
1187
1421
|
memo
|
1188
1422
|
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
|
1189
1423
|
end
|
1190
|
-
|
1191
|
-
# Logic for accessing elements as accessors
|
1192
|
-
def method_missing(method, *args)
|
1193
|
-
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1194
|
-
inherited_property_value(method.to_sym)
|
1195
|
-
else
|
1196
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1197
|
-
end
|
1198
|
-
end
|
1199
1424
|
end
|
1200
1425
|
|
1201
1426
|
class Schema < Metadata
|
@@ -1205,21 +1430,26 @@ module RDF::Tabular
|
|
1205
1430
|
columns: :array,
|
1206
1431
|
foreignKeys: :array,
|
1207
1432
|
primaryKey: :column_reference,
|
1433
|
+
rowTitles: :column_reference,
|
1208
1434
|
}.freeze
|
1209
1435
|
DEFAULTS = {}.freeze
|
1210
1436
|
REQUIRED = [].freeze
|
1211
1437
|
|
1212
|
-
# Setters
|
1438
|
+
# Getters and Setters
|
1213
1439
|
PROPERTIES.each do |key, type|
|
1440
|
+
define_method(key) do
|
1441
|
+
object.fetch(key, DEFAULTS[key])
|
1442
|
+
end
|
1443
|
+
|
1214
1444
|
define_method("#{key}=".to_sym) do |value|
|
1215
1445
|
invalid = case key
|
1216
|
-
when :primaryKey
|
1446
|
+
when :primaryKey, :rowTitles
|
1217
1447
|
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
1218
1448
|
end
|
1219
1449
|
|
1220
1450
|
if invalid
|
1221
1451
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1222
|
-
object
|
1452
|
+
object.delete(key)
|
1223
1453
|
else
|
1224
1454
|
object[key] = value
|
1225
1455
|
end
|
@@ -1270,12 +1500,21 @@ module RDF::Tabular
|
|
1270
1500
|
end
|
1271
1501
|
end
|
1272
1502
|
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1503
|
+
##
|
1504
|
+
# List of foreign keys referencing the specified table
|
1505
|
+
#
|
1506
|
+
# @param [Table] table
|
1507
|
+
# @return [Array<Hash>]
|
1508
|
+
def foreign_keys_referencing(table)
|
1509
|
+
Array(foreignKeys).select do |fk|
|
1510
|
+
reference = fk['reference']
|
1511
|
+
if reference['resource']
|
1512
|
+
ref = context.base.join(reference['resource']).to_s
|
1513
|
+
table.url == ref
|
1514
|
+
else # schemaReference
|
1515
|
+
ref = context.base.join(reference['schemaReference']).to_s
|
1516
|
+
table.tableSchema.id == ref
|
1517
|
+
end
|
1279
1518
|
end
|
1280
1519
|
end
|
1281
1520
|
end
|
@@ -1321,8 +1560,12 @@ module RDF::Tabular
|
|
1321
1560
|
super || columns.any? {|c| c.has_annotations? }
|
1322
1561
|
end
|
1323
1562
|
|
1324
|
-
# Setters
|
1563
|
+
# Getters and Setters
|
1325
1564
|
PROPERTIES.each do |key, t|
|
1565
|
+
define_method(key) do
|
1566
|
+
object.fetch(key, DEFAULTS[key])
|
1567
|
+
end
|
1568
|
+
|
1326
1569
|
define_method("#{key}=".to_sym) do |value|
|
1327
1570
|
invalid = case key
|
1328
1571
|
when :name
|
@@ -1339,7 +1582,7 @@ module RDF::Tabular
|
|
1339
1582
|
object.delete(key) if object[key].nil?
|
1340
1583
|
elsif invalid
|
1341
1584
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1342
|
-
object
|
1585
|
+
object.delete(key)
|
1343
1586
|
else
|
1344
1587
|
object[key] = value
|
1345
1588
|
end
|
@@ -1360,7 +1603,7 @@ module RDF::Tabular
|
|
1360
1603
|
# @return [RDF::URI]
|
1361
1604
|
def id;
|
1362
1605
|
url = table ? table.url : RDF::URI("")
|
1363
|
-
url + "#col=#{self.sourceNumber}";
|
1606
|
+
url.to_s + "#col=#{self.sourceNumber}";
|
1364
1607
|
end
|
1365
1608
|
|
1366
1609
|
# Return Annotated Column representation
|
@@ -1380,15 +1623,6 @@ module RDF::Tabular
|
|
1380
1623
|
memo
|
1381
1624
|
end.delete_if {|k,v| v.nil?}
|
1382
1625
|
end
|
1383
|
-
|
1384
|
-
# Logic for accessing elements as accessors
|
1385
|
-
def method_missing(method, *args)
|
1386
|
-
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1387
|
-
inherited_property_value(method.to_sym)
|
1388
|
-
else
|
1389
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1390
|
-
end
|
1391
|
-
end
|
1392
1626
|
end
|
1393
1627
|
|
1394
1628
|
class Transformation < Metadata
|
@@ -1404,8 +1638,13 @@ module RDF::Tabular
|
|
1404
1638
|
DEFAULTS = {}.freeze
|
1405
1639
|
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1406
1640
|
|
1407
|
-
# Setters
|
1641
|
+
# Getters and Setters
|
1408
1642
|
PROPERTIES.each do |key, type|
|
1643
|
+
next if [:url].include?(key)
|
1644
|
+
define_method(key) do
|
1645
|
+
object.fetch(key, DEFAULTS[key])
|
1646
|
+
end
|
1647
|
+
|
1409
1648
|
define_method("#{key}=".to_sym) do |value|
|
1410
1649
|
invalid = case key
|
1411
1650
|
when :scriptFormat, :targetFormat
|
@@ -1416,17 +1655,12 @@ module RDF::Tabular
|
|
1416
1655
|
|
1417
1656
|
if invalid
|
1418
1657
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1419
|
-
object
|
1658
|
+
object.delete(key)
|
1420
1659
|
else
|
1421
1660
|
object[key] = value
|
1422
1661
|
end
|
1423
1662
|
end
|
1424
1663
|
end
|
1425
|
-
|
1426
|
-
# Logic for accessing elements as accessors
|
1427
|
-
def method_missing(method, *args)
|
1428
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1429
|
-
end
|
1430
1664
|
end
|
1431
1665
|
|
1432
1666
|
class Dialect < Metadata
|
@@ -1444,7 +1678,7 @@ module RDF::Tabular
|
|
1444
1678
|
skipColumns: 0,
|
1445
1679
|
skipInitialSpace: false,
|
1446
1680
|
skipRows: 0,
|
1447
|
-
trim:
|
1681
|
+
trim: true
|
1448
1682
|
}.freeze
|
1449
1683
|
|
1450
1684
|
PROPERTIES = {
|
@@ -1467,13 +1701,15 @@ module RDF::Tabular
|
|
1467
1701
|
|
1468
1702
|
REQUIRED = [].freeze
|
1469
1703
|
|
1470
|
-
# Setters
|
1704
|
+
# Getters and Setters
|
1471
1705
|
PROPERTIES.keys.each do |key|
|
1706
|
+
define_method(key) do
|
1707
|
+
object.fetch(key, DEFAULTS[key])
|
1708
|
+
end
|
1709
|
+
|
1472
1710
|
define_method("#{key}=".to_sym) do |value|
|
1473
1711
|
invalid = case key
|
1474
|
-
when :commentPrefix, :delimiter, :quoteChar
|
1475
|
-
"a single character string" unless value.is_a?(String) && value.length == 1
|
1476
|
-
when :lineTerminators
|
1712
|
+
when :commentPrefix, :delimiter, :quoteChar, :lineTerminators
|
1477
1713
|
"a string" unless value.is_a?(String)
|
1478
1714
|
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
1479
1715
|
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
@@ -1493,7 +1729,7 @@ module RDF::Tabular
|
|
1493
1729
|
object.delete(key) if object[key].nil?
|
1494
1730
|
elsif invalid
|
1495
1731
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1496
|
-
object
|
1732
|
+
object.delete(key)
|
1497
1733
|
else
|
1498
1734
|
object[key] = value
|
1499
1735
|
end
|
@@ -1515,7 +1751,7 @@ module RDF::Tabular
|
|
1515
1751
|
# default for trim comes from skipInitialSpace
|
1516
1752
|
# @return [Boolean, String]
|
1517
1753
|
def trim
|
1518
|
-
object.fetch(:trim, self.skipInitialSpace ? 'start' :
|
1754
|
+
object.fetch(:trim, self.skipInitialSpace ? 'start' : true)
|
1519
1755
|
end
|
1520
1756
|
|
1521
1757
|
##
|
@@ -1546,7 +1782,8 @@ module RDF::Tabular
|
|
1546
1782
|
}
|
1547
1783
|
}
|
1548
1784
|
metadata ||= table # In case the embedded metadata becomes the final metadata
|
1549
|
-
metadata["lang"] = options[:lang] if options[:lang]
|
1785
|
+
lang = metadata["lang"] = options[:lang] if options[:lang]
|
1786
|
+
lang ||= 'und'
|
1550
1787
|
|
1551
1788
|
# Set encoding on input
|
1552
1789
|
csv = ::CSV.new(input, csv_options)
|
@@ -1575,9 +1812,9 @@ module RDF::Tabular
|
|
1575
1812
|
# Initialize titles
|
1576
1813
|
columns = table["tableSchema"]["columns"] ||= []
|
1577
1814
|
column = columns[index - skipCols] ||= {
|
1578
|
-
"titles" => {
|
1815
|
+
"titles" => {lang => []},
|
1579
1816
|
}
|
1580
|
-
column["titles"][
|
1817
|
+
column["titles"][lang] << value
|
1581
1818
|
end
|
1582
1819
|
end
|
1583
1820
|
debug("embedded_metadata") {"table: #{table.inspect}"}
|
@@ -1585,20 +1822,12 @@ module RDF::Tabular
|
|
1585
1822
|
|
1586
1823
|
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1587
1824
|
end
|
1588
|
-
|
1589
|
-
# Logic for accessing elements as accessors
|
1590
|
-
def method_missing(method, *args)
|
1591
|
-
if DEFAULTS.has_key?(method.to_sym)
|
1592
|
-
# As set, or with default
|
1593
|
-
object.fetch(method.to_sym, DEFAULTS[method.to_sym])
|
1594
|
-
else
|
1595
|
-
super
|
1596
|
-
end
|
1597
|
-
end
|
1598
1825
|
end
|
1599
1826
|
|
1600
1827
|
class Datatype < Metadata
|
1601
1828
|
PROPERTIES = {
|
1829
|
+
:@id => :link,
|
1830
|
+
:@type => :atomic,
|
1602
1831
|
base: :atomic,
|
1603
1832
|
format: :atomic,
|
1604
1833
|
length: :atomic,
|
@@ -1612,50 +1841,248 @@ module RDF::Tabular
|
|
1612
1841
|
maxExclusive: :atomic,
|
1613
1842
|
}.freeze
|
1614
1843
|
REQUIRED = [].freeze
|
1615
|
-
DEFAULTS = {
|
1844
|
+
DEFAULTS = {
|
1845
|
+
base: "string"
|
1846
|
+
}.freeze
|
1616
1847
|
|
1617
1848
|
# Override `base` in Metadata
|
1618
1849
|
def base; object[:base]; end
|
1619
1850
|
|
1620
|
-
# Setters
|
1851
|
+
# Getters and Setters
|
1621
1852
|
PROPERTIES.each do |key, type|
|
1853
|
+
define_method(key) do
|
1854
|
+
object.fetch(key, DEFAULTS[key])
|
1855
|
+
end
|
1856
|
+
|
1622
1857
|
define_method("#{key}=".to_sym) do |value|
|
1623
1858
|
invalid = case key
|
1859
|
+
when :base
|
1860
|
+
"built-in datatype" unless DATATYPES.keys.map(&:to_s).include?(value)
|
1624
1861
|
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1625
1862
|
"numeric or valid date/time" unless value.is_a?(Numeric) ||
|
1626
1863
|
RDF::Literal::Date.new(value.to_s).valid? ||
|
1627
1864
|
RDF::Literal::Time.new(value.to_s).valid? ||
|
1628
1865
|
RDF::Literal::DateTime.new(value.to_s).valid?
|
1629
1866
|
when :format
|
1630
|
-
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
|
1635
|
-
object
|
1867
|
+
case value
|
1868
|
+
when String
|
1869
|
+
nil
|
1870
|
+
when Hash
|
1871
|
+
unless (value.keys.map(&:to_s) - %w(groupChar decimalChar pattern)).empty?
|
1872
|
+
"an object containing only groupChar, decimalChar, and/or pattern"
|
1636
1873
|
end
|
1874
|
+
else
|
1875
|
+
"a string or object"
|
1637
1876
|
end
|
1638
1877
|
when :length, :minLength, :maxLength
|
1639
1878
|
if !(value.is_a?(Numeric) && value.integer? && value >= 0)
|
1640
1879
|
"a non-negative integer"
|
1641
|
-
elsif key != :length && object[:length] && value != object[:length]
|
1642
|
-
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
1643
|
-
"both length and #{key} requires they be equal"
|
1644
1880
|
end
|
1645
1881
|
end
|
1646
1882
|
|
1647
1883
|
if invalid
|
1648
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1649
|
-
object
|
1884
|
+
warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1885
|
+
object.delete(key)
|
1650
1886
|
else
|
1651
1887
|
object[key] = value
|
1652
1888
|
end
|
1653
1889
|
end
|
1654
1890
|
end
|
1655
1891
|
|
1656
|
-
|
1657
|
-
|
1658
|
-
|
1892
|
+
##
|
1893
|
+
# Parse the date format (if provided), and match against the value (if provided)
|
1894
|
+
# Otherwise, validate format and raise an error
|
1895
|
+
#
|
1896
|
+
# @param [String] format
|
1897
|
+
# @param [String] value
|
1898
|
+
# @return [String] XMLSchema version of value
|
1899
|
+
# @raise [ArgumentError] if format is not valid, or nil, if value does not match
|
1900
|
+
def parse_uax35_date(format, value)
|
1901
|
+
tz, date_format, time_format = nil, nil, nil
|
1902
|
+
return value unless format
|
1903
|
+
value ||= ""
|
1904
|
+
|
1905
|
+
# Extract tz info
|
1906
|
+
if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
|
1907
|
+
format, tz = md[1], md[2]
|
1908
|
+
end
|
1909
|
+
|
1910
|
+
date_format, time_format = format.split(' ')
|
1911
|
+
date_format, time_format = nil, date_format if self.base.to_sym == :time
|
1912
|
+
|
1913
|
+
# Extract date, of specified
|
1914
|
+
date_part = case date_format
|
1915
|
+
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
1916
|
+
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
1917
|
+
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
1918
|
+
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
1919
|
+
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
1920
|
+
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
1921
|
+
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
|
1922
|
+
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1923
|
+
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1924
|
+
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1925
|
+
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1926
|
+
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1927
|
+
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1928
|
+
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1929
|
+
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
|
1930
|
+
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1931
|
+
when /yyyy-MM-ddTHH:mm:ss\.S+/
|
1932
|
+
md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1933
|
+
num_ms = date_format.match(/S+/).to_s.length
|
1934
|
+
md if md && md[:ms].length <= num_ms
|
1935
|
+
else
|
1936
|
+
raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
|
1937
|
+
nil
|
1938
|
+
end
|
1939
|
+
|
1940
|
+
# Forward past date part
|
1941
|
+
if date_part
|
1942
|
+
value = value[date_part.to_s.length..-1]
|
1943
|
+
value = value.lstrip if date_part && value.start_with?(' ')
|
1944
|
+
end
|
1945
|
+
|
1946
|
+
# Extract time, of specified
|
1947
|
+
time_part = case time_format
|
1948
|
+
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1949
|
+
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
|
1950
|
+
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1951
|
+
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1952
|
+
when /HH:mm:ss\.S+/
|
1953
|
+
md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1954
|
+
num_ms = time_format.match(/S+/).to_s.length
|
1955
|
+
md if md && md[:ms].length <= num_ms
|
1956
|
+
else
|
1957
|
+
raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
|
1958
|
+
nil
|
1959
|
+
end
|
1960
|
+
|
1961
|
+
# If there's a date_format but no date_part, match fails
|
1962
|
+
return nil if date_format && date_part.nil?
|
1963
|
+
|
1964
|
+
# If there's a time_format but no time_part, match fails
|
1965
|
+
return nil if time_format && time_part.nil?
|
1966
|
+
|
1967
|
+
# Forward past time part
|
1968
|
+
value = value[time_part.to_s.length..-1] if time_part
|
1969
|
+
|
1970
|
+
# Use datetime match for time
|
1971
|
+
time_part = date_part if date_part && date_part.names.include?("hr")
|
1972
|
+
|
1973
|
+
# If there's a timezone, it may optionally start with whitespace
|
1974
|
+
value = value.lstrip if tz.to_s.start_with?(' ')
|
1975
|
+
tz_part = value if tz
|
1976
|
+
|
1977
|
+
# Compose normalized value
|
1978
|
+
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1979
|
+
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
1980
|
+
|
1981
|
+
# Add milliseconds, if matched
|
1982
|
+
vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
|
1983
|
+
|
1984
|
+
value = [vd, vt].compact.join('T')
|
1985
|
+
value += tz_part.to_s
|
1986
|
+
end
|
1987
|
+
|
1988
|
+
##
|
1989
|
+
# Parse the date format (if provided), and match against the value (if provided)
|
1990
|
+
# Otherwise, validate format and raise an error
|
1991
|
+
#
|
1992
|
+
# @param [String] pattern
|
1993
|
+
# @param [String] value
|
1994
|
+
# @param [String] groupChar
|
1995
|
+
# @param [String] decimalChar
|
1996
|
+
# @return [String] XMLSchema version of value or nil, if value does not match
|
1997
|
+
# @raise [ArgumentError] if format is not valid
|
1998
|
+
def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
|
1999
|
+
return value if pattern.to_s.empty?
|
2000
|
+
value ||= ""
|
2001
|
+
|
2002
|
+
re = build_number_re(pattern, groupChar, decimalChar)
|
2003
|
+
|
2004
|
+
# Upcase value and remove internal spaces
|
2005
|
+
value = value.upcase.gsub(/\s+/, '')
|
2006
|
+
|
2007
|
+
# Remove groupChar from value
|
2008
|
+
value = value.gsub(groupChar, '')
|
2009
|
+
|
2010
|
+
# Replace decimalChar with "."
|
2011
|
+
value = value.gsub(decimalChar, '.')
|
2012
|
+
|
2013
|
+
if value =~ re
|
2014
|
+
# result re-assembles parts removed from value
|
2015
|
+
value
|
2016
|
+
else
|
2017
|
+
# no match
|
2018
|
+
nil
|
2019
|
+
end
|
2020
|
+
end
|
2021
|
+
|
2022
|
+
# Build a regular expression from the provided pattern to match value, after suitable modifications
|
2023
|
+
#
|
2024
|
+
# @param [String] pattern
|
2025
|
+
# @param [String] groupChar
|
2026
|
+
# @param [String] decimalChar
|
2027
|
+
# @return [Regexp] Regular expression matching value
|
2028
|
+
# @raise [ArgumentError] if format is not valid
|
2029
|
+
def build_number_re(pattern, groupChar, decimalChar)
|
2030
|
+
# pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
|
2031
|
+
legal_number_pattern = /\A
|
2032
|
+
([%‰])?
|
2033
|
+
([+-])?
|
2034
|
+
# Mantissa
|
2035
|
+
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2036
|
+
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2037
|
+
# Fractional
|
2038
|
+
(?:#{decimalChar == '.' ? '\.' : decimalChar}
|
2039
|
+
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2040
|
+
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2041
|
+
# Exponent
|
2042
|
+
(E
|
2043
|
+
[+-]?
|
2044
|
+
(?:\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2045
|
+
(?:0|#{groupChar == '.' ? '\.' : groupChar})*
|
2046
|
+
)?
|
2047
|
+
)?
|
2048
|
+
([%‰])?
|
2049
|
+
\Z/x
|
2050
|
+
|
2051
|
+
unless pattern =~ legal_number_pattern
|
2052
|
+
raise ArgumentError, "unrecognized number pattern #{pattern}"
|
2053
|
+
end
|
2054
|
+
|
2055
|
+
# Remove groupChar from pattern
|
2056
|
+
pattern = pattern.gsub(groupChar, '')
|
2057
|
+
|
2058
|
+
# Replace decimalChar with "."
|
2059
|
+
pattern = pattern.gsub(decimalChar, '.')
|
2060
|
+
|
2061
|
+
# Split on decimalChar and E
|
2062
|
+
parts = pattern.split(/[\.E]/)
|
2063
|
+
|
2064
|
+
# Construct regular expression
|
2065
|
+
mantissa_str = case parts[0]
|
2066
|
+
when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
|
2067
|
+
when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
|
2068
|
+
when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
|
2069
|
+
end
|
2070
|
+
|
2071
|
+
fractional_str = case parts[1]
|
2072
|
+
when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
|
2073
|
+
when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
|
2074
|
+
when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
|
2075
|
+
end
|
2076
|
+
fractional_str = "\\.#{fractional_str}" if fractional_str
|
2077
|
+
|
2078
|
+
exponent_str = case parts[2]
|
2079
|
+
when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
|
2080
|
+
when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
|
2081
|
+
when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
|
2082
|
+
end
|
2083
|
+
exponent_str = "E#{exponent_str}" if exponent_str
|
2084
|
+
|
2085
|
+
Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
|
1659
2086
|
end
|
1660
2087
|
end
|
1661
2088
|
|
@@ -1697,6 +2124,10 @@ module RDF::Tabular
|
|
1697
2124
|
"errors" => self.errors
|
1698
2125
|
}.delete_if {|k,v| Array(v).empty?}
|
1699
2126
|
end
|
2127
|
+
|
2128
|
+
def inspect
|
2129
|
+
self.class.name + to_atd.inspect
|
2130
|
+
end
|
1700
2131
|
end
|
1701
2132
|
|
1702
2133
|
# Row values, hashed by `name`
|
@@ -1715,6 +2146,16 @@ module RDF::Tabular
|
|
1715
2146
|
# @return [Table]
|
1716
2147
|
attr_reader :table
|
1717
2148
|
|
2149
|
+
#
|
2150
|
+
# Cells providing a unique row identifier
|
2151
|
+
# @return [Array<Cell>]
|
2152
|
+
attr_reader :primaryKey
|
2153
|
+
|
2154
|
+
#
|
2155
|
+
# Title(s) of this row
|
2156
|
+
# @return [Array<RDF::Literal>]
|
2157
|
+
attr_reader :titles
|
2158
|
+
|
1718
2159
|
#
|
1719
2160
|
# Context from Table with base set to table URL for expanding URI Templates
|
1720
2161
|
# @return [JSON::LD::Context]
|
@@ -1725,8 +2166,10 @@ module RDF::Tabular
|
|
1725
2166
|
# @param [Metadata] metadata for Table
|
1726
2167
|
# @param [Integer] number 1-based row number after skipped/header rows
|
1727
2168
|
# @param [Integer] source_number 1-based row number from source
|
2169
|
+
# @param [Hash{Symbol => Object}] options ({})
|
2170
|
+
# @option options [Boolean] :validate check for PK/FK consistency
|
1728
2171
|
# @return [Row]
|
1729
|
-
def initialize(row, metadata, number, source_number)
|
2172
|
+
def initialize(row, metadata, number, source_number, options = {})
|
1730
2173
|
@table = metadata
|
1731
2174
|
@number = number
|
1732
2175
|
@sourceNumber = source_number
|
@@ -1748,7 +2191,7 @@ module RDF::Tabular
|
|
1748
2191
|
end
|
1749
2192
|
|
1750
2193
|
# Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
|
1751
|
-
columns.each_with_index {|c, index| row[index] ||=
|
2194
|
+
columns.each_with_index {|c, index| row[index] ||= c.null}
|
1752
2195
|
|
1753
2196
|
row.each_with_index do |value, index|
|
1754
2197
|
|
@@ -1764,7 +2207,7 @@ module RDF::Tabular
|
|
1764
2207
|
|
1765
2208
|
@values << cell = Cell.new(metadata, column, self, value)
|
1766
2209
|
|
1767
|
-
datatype = column.datatype || Datatype.new(base: "string", parent: column)
|
2210
|
+
datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
|
1768
2211
|
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
|
1769
2212
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
|
1770
2213
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -1787,23 +2230,35 @@ module RDF::Tabular
|
|
1787
2230
|
v.strip!
|
1788
2231
|
end
|
1789
2232
|
|
1790
|
-
expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
|
2233
|
+
expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
|
1791
2234
|
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
1792
2235
|
lit_or_errors
|
1793
2236
|
else
|
1794
2237
|
cell_errors += lit_or_errors
|
1795
|
-
RDF::Literal(v, language: column.lang)
|
2238
|
+
RDF::Literal(v, language: (column.lang unless column.lang == "und"))
|
1796
2239
|
end
|
1797
2240
|
end
|
1798
2241
|
end.compact
|
1799
2242
|
|
2243
|
+
# Check for required values
|
2244
|
+
if column.required && (cell_values.any? {|v| v.to_s.empty?} || cell_values.empty?)
|
2245
|
+
cell_errors << "Required column has empty value(s): #{cell_values.map(&:to_s).inspect}"
|
2246
|
+
end
|
1800
2247
|
cell.value = (column.separator ? cell_values : cell_values.first)
|
1801
2248
|
cell.errors = cell_errors
|
1802
|
-
metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
|
1803
2249
|
|
1804
2250
|
map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
|
1805
2251
|
end
|
1806
2252
|
|
2253
|
+
# Record primaryKey if validating
|
2254
|
+
@primaryKey = @values.
|
2255
|
+
select {|cell| Array(table.tableSchema.primaryKey).include?(cell.column.name)} if options[:validate]
|
2256
|
+
|
2257
|
+
# Record any row titles
|
2258
|
+
@titles = @values.
|
2259
|
+
select {|cell| Array(table.tableSchema.rowTitles).include?(cell.column.name)}.
|
2260
|
+
map(&:value)
|
2261
|
+
|
1807
2262
|
# Map URLs for row
|
1808
2263
|
@values.each_with_index do |cell, index|
|
1809
2264
|
mapped_values = map_values.merge(
|
@@ -1824,30 +2279,24 @@ module RDF::Tabular
|
|
1824
2279
|
{
|
1825
2280
|
"@id" => id.to_s,
|
1826
2281
|
"@type" => "Row",
|
1827
|
-
"table" => (table.id
|
2282
|
+
"table" => (table.id || table.url),
|
1828
2283
|
"number" => self.number,
|
1829
2284
|
"sourceNumber" => self.sourceNumber,
|
1830
|
-
"cells" => @values.map(&:
|
2285
|
+
"cells" => @values.map(&:value)
|
1831
2286
|
}.delete_if {|k,v| v.nil?}
|
1832
2287
|
end
|
1833
2288
|
|
2289
|
+
def inspect
|
2290
|
+
self.class.name + to_atd.inspect
|
2291
|
+
end
|
2292
|
+
|
1834
2293
|
private
|
1835
2294
|
#
|
1836
2295
|
# given a datatype specification, return a literal matching that specififcation, if found, otherwise nil
|
1837
2296
|
# @return [RDF::Literal]
|
1838
2297
|
def value_matching_datatype(value, datatype, expanded_dt, language)
|
1839
|
-
value_errors = []
|
1840
|
-
|
1841
|
-
# Check constraints
|
1842
|
-
if datatype.length && value.length != datatype.length
|
1843
|
-
value_errors << "#{value} does not have length #{datatype.length}"
|
1844
|
-
end
|
1845
|
-
if datatype.minLength && value.length < datatype.minLength
|
1846
|
-
value_errors << "#{value} does not have length >= #{datatype.minLength}"
|
1847
|
-
end
|
1848
|
-
if datatype.maxLength && value.length > datatype.maxLength
|
1849
|
-
value_errors << "#{value} does not have length <= #{datatype.maxLength}"
|
1850
|
-
end
|
2298
|
+
lit, value_errors = nil, []
|
2299
|
+
original_value = value.dup
|
1851
2300
|
|
1852
2301
|
format = datatype.format
|
1853
2302
|
# Datatype specific constraints and conversions
|
@@ -1857,29 +2306,39 @@ module RDF::Tabular
|
|
1857
2306
|
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
1858
2307
|
:nonPositiveInteger, :negativeInteger,
|
1859
2308
|
:double, :float, :number
|
2309
|
+
|
1860
2310
|
# Normalize representation based on numeric-specific facets
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
2311
|
+
|
2312
|
+
format = case format
|
2313
|
+
when String then {"pattern" => format}
|
2314
|
+
when Hash then format
|
2315
|
+
else {}
|
1866
2316
|
end
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
2317
|
+
|
2318
|
+
groupChar = format["groupChar"]
|
2319
|
+
decimalChar = format["decimalChar"] || '.'
|
2320
|
+
pattern = format["pattern"]
|
2321
|
+
|
2322
|
+
if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
|
2323
|
+
value_errors << "#{value} does not match pattern #{pattern}"
|
1870
2324
|
end
|
1871
|
-
|
1872
|
-
|
2325
|
+
|
2326
|
+
# pattern facet failed
|
2327
|
+
value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
|
2328
|
+
value = value.gsub(groupChar, '') if groupChar
|
2329
|
+
value = value.sub(decimalChar, '.')
|
1873
2330
|
|
1874
2331
|
# Extract percent or per-mille sign
|
1875
2332
|
percent = permille = false
|
1876
|
-
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
2333
|
+
if groupChar
|
2334
|
+
case value
|
2335
|
+
when /%/
|
2336
|
+
value = value.sub('%', '')
|
2337
|
+
percent = true
|
2338
|
+
when /‰/
|
2339
|
+
value = value.sub('‰', '')
|
2340
|
+
permille = true
|
2341
|
+
end
|
1883
2342
|
end
|
1884
2343
|
|
1885
2344
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
@@ -1889,129 +2348,117 @@ module RDF::Tabular
|
|
1889
2348
|
o = o / 1000 if permille
|
1890
2349
|
lit = RDF::Literal(o, datatype: expanded_dt)
|
1891
2350
|
end
|
2351
|
+
|
2352
|
+
if !lit.plain? && datatype.minimum && lit < datatype.minimum
|
2353
|
+
value_errors << "#{value} < minimum #{datatype.minimum}"
|
2354
|
+
end
|
2355
|
+
case
|
2356
|
+
when datatype.minimum && lit < datatype.minimum
|
2357
|
+
value_errors << "#{value} < minimum #{datatype.minimum}"
|
2358
|
+
when datatype.maximum && lit > datatype.maximum
|
2359
|
+
value_errors << "#{value} > maximum #{datatype.maximum}"
|
2360
|
+
when datatype.minInclusive && lit < datatype.minInclusive
|
2361
|
+
value_errors << "#{value} < minInclusive #{datatype.minInclusive}"
|
2362
|
+
when datatype.maxInclusive && lit > datatype.maxInclusive
|
2363
|
+
value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}"
|
2364
|
+
when datatype.minExclusive && lit <= datatype.minExclusive
|
2365
|
+
value_errors << "#{value} <= minExclusive #{datatype.minExclusive}"
|
2366
|
+
when datatype.maxExclusive && lit >= datatype.maxExclusive
|
2367
|
+
value_errors << "#{value} ?= maxExclusive #{datatype.maxExclusive}"
|
2368
|
+
end
|
1892
2369
|
when :boolean
|
1893
|
-
|
2370
|
+
if format
|
1894
2371
|
# True/False determined by Y|N values
|
1895
2372
|
t, f = format.to_s.split('|', 2)
|
1896
2373
|
case
|
1897
2374
|
when value == t
|
1898
|
-
|
2375
|
+
lit = RDF::Literal::TRUE
|
1899
2376
|
when value == f
|
1900
|
-
|
2377
|
+
lit = RDF::Literal::FALSE
|
1901
2378
|
else
|
1902
2379
|
value_errors << "#{value} does not match boolean format #{format}"
|
1903
|
-
RDF::Literal::Boolean.new(value)
|
1904
2380
|
end
|
1905
2381
|
else
|
1906
2382
|
if %w(1 true).include?(value.downcase)
|
1907
|
-
RDF::Literal::TRUE
|
2383
|
+
lit = RDF::Literal::TRUE
|
1908
2384
|
elsif %w(0 false).include?(value.downcase)
|
1909
|
-
RDF::Literal::FALSE
|
2385
|
+
lit = RDF::Literal::FALSE
|
2386
|
+
else
|
2387
|
+
value_errors << "#{value} does not match boolean"
|
1910
2388
|
end
|
1911
2389
|
end
|
1912
2390
|
when :date, :time, :dateTime, :dateTimeStamp, :datetime
|
1913
|
-
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1917
|
-
if format && (md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/))
|
1918
|
-
format, tz = md[1], md[2]
|
2391
|
+
if value = datatype.parse_uax35_date(format, value)
|
2392
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
2393
|
+
else
|
2394
|
+
value_errors << "#{original_value} does not match format #{format}"
|
1919
2395
|
end
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1923
|
-
|
1924
|
-
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
|
1932
|
-
|
1933
|
-
|
1934
|
-
|
1935
|
-
|
1936
|
-
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1937
|
-
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1938
|
-
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1939
|
-
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1940
|
-
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1941
|
-
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1942
|
-
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1943
|
-
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
|
1944
|
-
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
|
1945
|
-
else
|
1946
|
-
value_errors << "unrecognized date/time format #{date_format}" if date_format
|
1947
|
-
nil
|
2396
|
+
when :duration, :dayTimeDuration, :yearMonthDuration
|
2397
|
+
# SPEC CONFUSION: surely format also includes that for other duration types?
|
2398
|
+
re = Regexp.new(format) rescue nil
|
2399
|
+
if re.nil? ||value.match(re)
|
2400
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
2401
|
+
else
|
2402
|
+
value_errors << "#{value} does not match format #{format}"
|
2403
|
+
end
|
2404
|
+
when :hexBinary, :base64Binary
|
2405
|
+
lit = RDF::Literal.new(value, datatype: expanded_dt)
|
2406
|
+
unless lit.valid?
|
2407
|
+
value_errors << "#{value} is invalid"
|
2408
|
+
lit = RDF::Literal.new(value)
|
2409
|
+
else
|
2410
|
+
if datatype.length && lit.object.length != datatype.length
|
2411
|
+
value_errors << "decoded #{value} does not have length #{datatype.length}"
|
1948
2412
|
end
|
1949
|
-
|
1950
|
-
|
1951
|
-
if date_part
|
1952
|
-
value = value[date_part.to_s.length..-1]
|
1953
|
-
value = value.lstrip if date_part && value.start_with?(' ')
|
2413
|
+
if datatype.minLength && lit.object.length < datatype.minLength
|
2414
|
+
value_errors << "decoded #{value} does not have length >= #{datatype.length}"
|
1954
2415
|
end
|
1955
|
-
|
1956
|
-
|
1957
|
-
time_part = case time_format
|
1958
|
-
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
|
1959
|
-
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})/)
|
1960
|
-
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
|
1961
|
-
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)/)
|
1962
|
-
else
|
1963
|
-
value_errors << "unrecognized date/time format #{time_format}" if time_format
|
1964
|
-
nil
|
2416
|
+
if datatype.maxLength && lit.object.length < datatype.maxLength
|
2417
|
+
value_errors << "decoded #{value} does not have length <= #{datatype.length}"
|
1965
2418
|
end
|
1966
|
-
|
1967
|
-
# Forward past time part
|
1968
|
-
value = value[time_part.to_s.length..-1] if time_part
|
1969
|
-
|
1970
|
-
# Use datetime match for time
|
1971
|
-
time_part = date_part if date_part && date_part.names.include?("hr")
|
1972
|
-
|
1973
|
-
# If there's a timezone, it may optionally start with whitespace
|
1974
|
-
value = value.lstrip if tz.to_s.start_with?(' ')
|
1975
|
-
tz_part = value if tz
|
1976
|
-
|
1977
|
-
# Compose normalized value
|
1978
|
-
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1979
|
-
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
1980
|
-
value = [vd, vt].compact.join('T')
|
1981
|
-
value += tz_part.to_s
|
1982
2419
|
end
|
1983
|
-
|
1984
|
-
lit = RDF::Literal(value, datatype: expanded_dt)
|
1985
|
-
when :duration, :dayTimeDuration, :yearMonthDuration
|
1986
|
-
# SPEC CONFUSION: surely format also includes that for other duration types?
|
1987
|
-
lit = RDF::Literal(value, datatype: expanded_dt)
|
1988
2420
|
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|
1989
2421
|
:ENTITY, :ID, :IDREF, :NOTATION
|
1990
2422
|
value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
|
1991
2423
|
else
|
1992
2424
|
# For other types, format is a regexp
|
1993
|
-
|
2425
|
+
re = Regexp.new(format) rescue nil
|
2426
|
+
unless re.nil? || value.match(re)
|
1994
2427
|
value_errors << "#{value} does not match format #{format}"
|
1995
2428
|
end
|
1996
2429
|
lit = if value_errors.empty?
|
1997
2430
|
if expanded_dt == RDF::XSD.string
|
1998
2431
|
# Type string will still use language
|
1999
|
-
RDF::Literal(value, language: language)
|
2432
|
+
RDF::Literal(value, language: (language unless language == "und"))
|
2000
2433
|
else
|
2001
2434
|
RDF::Literal(value, datatype: expanded_dt)
|
2002
2435
|
end
|
2003
2436
|
end
|
2004
2437
|
end
|
2005
2438
|
|
2439
|
+
if datatype.length && value.to_s.length != datatype.length && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
|
2440
|
+
value_errors << "#{value} does not have length #{datatype.length}"
|
2441
|
+
end
|
2442
|
+
if datatype.minLength && value.to_s.length < datatype.minLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
|
2443
|
+
value_errors << "#{value} does not have length >= #{datatype.minLength}"
|
2444
|
+
end
|
2445
|
+
if datatype.maxLength && value.to_s.length > datatype.maxLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
|
2446
|
+
value_errors << "#{value} does not have length <= #{datatype.maxLength}"
|
2447
|
+
end
|
2448
|
+
|
2449
|
+
# value constraints
|
2450
|
+
value_errors << "#{value} < minimum #{datatype.minimum}" if datatype.minimum && lit < datatype.minimum
|
2451
|
+
value_errors << "#{value} > maximum #{datatype.maximum}" if datatype.maximum && lit > datatype.maximum
|
2452
|
+
value_errors << "#{value} < minInclusive #{datatype.minInclusive}" if datatype.minInclusive && lit < datatype.minInclusive
|
2453
|
+
value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}" if datatype.maxInclusive && lit > datatype.maxInclusive
|
2454
|
+
value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" if datatype.minExclusive && lit <= datatype.minExclusive
|
2455
|
+
value_errors << "#{value} >= maxExclusive #{datatype.maxExclusive}" if datatype.maxExclusive && lit >= datatype.maxExclusive
|
2456
|
+
|
2006
2457
|
# Final value is a valid literal, or a plain literal otherwise
|
2007
2458
|
value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
|
2008
2459
|
|
2009
|
-
#
|
2010
|
-
|
2460
|
+
# Either return matched literal value or errors
|
2011
2461
|
value_errors.empty? ? lit : value_errors
|
2012
2462
|
end
|
2013
2463
|
end
|
2014
|
-
|
2015
|
-
# Metadata errors detected
|
2016
|
-
class Error < StandardError; end
|
2017
2464
|
end
|