rdf-tabular 0.1.3.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/VERSION +1 -1
- data/etc/README +4 -0
- data/etc/csvw.jsonld +1 -1
- data/etc/earl.html +10544 -0
- data/etc/earl.jsonld +17068 -0
- data/etc/earl.ttl +7114 -0
- data/etc/template.haml +205 -0
- data/etc/well-known +4 -0
- data/lib/rdf/tabular.rb +10 -4
- data/lib/rdf/tabular/format.rb +11 -7
- data/lib/rdf/tabular/metadata.rb +761 -314
- data/lib/rdf/tabular/reader.rb +276 -144
- data/spec/format_spec.rb +11 -8
- data/spec/matchers.rb +4 -4
- data/spec/metadata_spec.rb +120 -36
- data/spec/reader_spec.rb +56 -18
- data/spec/spec_helper.rb +10 -2
- data/spec/suite_helper.rb +35 -18
- data/spec/suite_spec.rb +26 -24
- metadata +15 -3
data/etc/template.haml
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
-# This template is used for generating a rollup EARL report. It expects to be
|
2
|
+
-# called with a single _tests_ local with the following structure
|
3
|
+
- require 'cgi'
|
4
|
+
- require 'digest'
|
5
|
+
|
6
|
+
!!! 5
|
7
|
+
%html{:prefix => "earl: http://www.w3.org/ns/earl# doap: http://usefulinc.com/ns/doap# mf: http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#"}
|
8
|
+
- subjects = tests['testSubjects']
|
9
|
+
%head
|
10
|
+
%meta{"http-equiv" => "Content-Type", :content => "text/html;charset=utf-8"}
|
11
|
+
%meta{name: "viewport", content: "width=device-width, initial-scale=1.0"}
|
12
|
+
%link{rel: "stylesheet", type: "text/css", href: "https://www.w3.org/StyleSheets/TR/base"}
|
13
|
+
%title
|
14
|
+
= tests['name']
|
15
|
+
Implementation Report
|
16
|
+
:css
|
17
|
+
span[property='dc:description'] { display: none; }
|
18
|
+
td.PASS { color: green; }
|
19
|
+
td.FAIL { color: red; }
|
20
|
+
table.report {
|
21
|
+
border-width: 1px;
|
22
|
+
border-spacing: 2px;
|
23
|
+
border-style: outset;
|
24
|
+
border-color: gray;
|
25
|
+
border-collapse: separate;
|
26
|
+
background-color: white;
|
27
|
+
}
|
28
|
+
table.report th {
|
29
|
+
border-width: 1px;
|
30
|
+
padding: 1px;
|
31
|
+
border-style: inset;
|
32
|
+
border-color: gray;
|
33
|
+
background-color: white;
|
34
|
+
-moz-border-radius: ;
|
35
|
+
}
|
36
|
+
table.report td {
|
37
|
+
border-width: 1px;
|
38
|
+
padding: 1px;
|
39
|
+
border-style: inset;
|
40
|
+
border-color: gray;
|
41
|
+
background-color: white;
|
42
|
+
-moz-border-radius: ;
|
43
|
+
}
|
44
|
+
tr.summary {font-weight: bold;}
|
45
|
+
td.passed-all {color: green;}
|
46
|
+
td.passed-most {color: darkorange;}
|
47
|
+
td.passed-some {color: red;}
|
48
|
+
td.passed-none {color: gray;}
|
49
|
+
em.rfc2119 {
|
50
|
+
text-transform: lowercase;
|
51
|
+
font-variant: small-caps;
|
52
|
+
font-style: normal;
|
53
|
+
color: #900;
|
54
|
+
}
|
55
|
+
a.testlink {
|
56
|
+
color: inherit;
|
57
|
+
text-decoration: none;
|
58
|
+
}
|
59
|
+
a.testlink:hover {
|
60
|
+
text-decoration: underline;
|
61
|
+
}
|
62
|
+
%body
|
63
|
+
- subject_refs = {}
|
64
|
+
- tests['entries'].each {|m| m['title'] ||= m['description']}
|
65
|
+
%section{:about => tests['@id'], typeof: Array(tests['@type']).join(" ")}
|
66
|
+
%h2
|
67
|
+
Ruby rdf-tabular gem test results
|
68
|
+
%p
|
69
|
+
This document reports conformance for for the following specifications:
|
70
|
+
%ul
|
71
|
+
%li
|
72
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/tabular-data-model/"}="MetaModel for Tabular Data and Metadata on the Web"
|
73
|
+
%li
|
74
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/tabular-metadata/"}="Metadata Vocabulary for Tabular Data"
|
75
|
+
%li
|
76
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/csv2rdf/"}="Generating RDF from Tabular Data on the Web"
|
77
|
+
%li
|
78
|
+
%a{property: "doap:name", href: "http://www.w3.org/TR/csv2json/"}="Generating JSON from Tabular Data on the Web"
|
79
|
+
%p
|
80
|
+
This report is also available in
|
81
|
+
%a{:href => "earl.ttl"}
|
82
|
+
Turtle
|
83
|
+
%dl
|
84
|
+
- subjects.each_with_index do |subject, index|
|
85
|
+
- subject_refs[subject['@id']] = "subj_#{index}"
|
86
|
+
%dt{:id => subject_refs[subject['@id']]}
|
87
|
+
%a{:href => subject['@id']}
|
88
|
+
%span{:about => subject['@id'], property: "doap:name"}<= subject['name']
|
89
|
+
%dd{property: "earl:testSubjects", resource: subject['@id'], typeof: Array(subject['@type']).join(" "), :inlist => true}
|
90
|
+
%dl
|
91
|
+
- if subject['doapDesc']
|
92
|
+
%dt= "Description"
|
93
|
+
%dd{property: "doap:description", :lang => 'en'}<
|
94
|
+
~ CGI.escapeHTML subject['doapDesc']
|
95
|
+
- if subject['language']
|
96
|
+
%dt= "Programming Language"
|
97
|
+
%dd{property: "doap:programming-language"}<
|
98
|
+
~ CGI.escapeHTML subject['language']
|
99
|
+
- if subject['homepage']
|
100
|
+
%dt= "Home Page"
|
101
|
+
%dd{property: "doap:homepage"}
|
102
|
+
%a{href: subject['homepage']}
|
103
|
+
~ CGI.escapeHTML subject['homepage']
|
104
|
+
- if subject['developer']
|
105
|
+
%dt= "Developer"
|
106
|
+
%dd{:rel => "doap:developer"}
|
107
|
+
- subject['developer'].each do |dev|
|
108
|
+
%div{resource: dev['@id'], typeof: Array(dev['@type']).join(" ")}
|
109
|
+
- if dev.has_key?('@id')
|
110
|
+
%a{:href => dev['@id']}
|
111
|
+
%span{property: "foaf:name"}<
|
112
|
+
~ CGI.escapeHTML dev['foaf:name']
|
113
|
+
- else
|
114
|
+
%span{property: "foaf:name"}<
|
115
|
+
~ CGI.escapeHTML dev['foaf:name']
|
116
|
+
- if dev['foaf:homepage']
|
117
|
+
%a{property: "foaf:homepage", href: dev['foaf:homepage']}
|
118
|
+
~ CGI.escapeHTML dev['foaf:homepage']
|
119
|
+
%dt
|
120
|
+
Test Suite Compliance
|
121
|
+
%dd
|
122
|
+
%table.report
|
123
|
+
%tbody
|
124
|
+
- tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
|
125
|
+
- passed = manifest['entries'].select {|t| t['assertions'][index]['result']['outcome'] == 'earl:passed' }.length
|
126
|
+
- total = manifest['entries'].length
|
127
|
+
- pct = (passed * 100.0) / total
|
128
|
+
- cls = (pct == 100.0 ? 'passed-all' : (pct >= 85.0) ? 'passed-most' : (pct == 0.0 ? 'passed-none' : 'passed-some'))
|
129
|
+
%tr
|
130
|
+
%td
|
131
|
+
%a{href: "##{manifest['title']}"}
|
132
|
+
~ manifest['title']
|
133
|
+
%td{:class => cls}
|
134
|
+
= pct == 0.0 ? "Untested" : "#{passed}/#{total} (#{'%.1f' % pct}%)"
|
135
|
+
%section
|
136
|
+
%h2
|
137
|
+
Individual Test Results
|
138
|
+
- tests['entries'].sort_by {|m| m['title'].to_s.downcase}.each do |manifest|
|
139
|
+
- test_cases = manifest['entries']
|
140
|
+
%section{id: manifest['title'], typeof: manifest['@type'].join(" "), resource: manifest['@id']}
|
141
|
+
%h2{property: "dc:title mf:name"}<=manifest['title']
|
142
|
+
- Array(manifest['description']).each do |desc|
|
143
|
+
%p{property: "rdfs:comment"}<
|
144
|
+
~ CGI.escapeHTML desc
|
145
|
+
%table.report
|
146
|
+
- skip_subject = {}
|
147
|
+
- passed_tests = []
|
148
|
+
%tr
|
149
|
+
%th
|
150
|
+
Test
|
151
|
+
- subjects.each_with_index do |subject, index|
|
152
|
+
- subject_refs[subject['@id']] = "subj_#{index}"
|
153
|
+
-# If subject is untested for every test in this manifest, skip it
|
154
|
+
- skip_subject[subject['@id']] = manifest['entries'].all? {|t| t['assertions'][index]['result']['outcome'] == 'earl:untested'}
|
155
|
+
- unless skip_subject[subject['@id']]
|
156
|
+
%th
|
157
|
+
%a{:href => '#' + subject_refs[subject['@id']]}<=subject['name']
|
158
|
+
- test_cases.each do |test|
|
159
|
+
%tr{:rel => "mf:entries", typeof: test['@type'].join(" "), resource: test['@id'], :inlist => true}
|
160
|
+
%td
|
161
|
+
= "Test #{test['@id'].split("#").last}: #{CGI.escapeHTML test['title']}"
|
162
|
+
- test['assertions'].each_with_index do |assertion, ndx|
|
163
|
+
- next if skip_subject[assertion['subject']]
|
164
|
+
- pass_fail = assertion['result']['outcome'].split(':').last.upcase.sub(/(PASS|FAIL)ED$/, '\1')
|
165
|
+
- passed_tests[ndx] = (passed_tests[ndx] || 0) + (pass_fail == 'PASS' ? 1 : 0)
|
166
|
+
%td{:class => pass_fail, property: "earl:assertions", typeof: assertion['@type'], :inlist => true}
|
167
|
+
- if assertion['assertedBy']
|
168
|
+
%link{property: "earl:assertedBy", :href => assertion['assertedBy']}
|
169
|
+
%link{property: "earl:test", :href => assertion['test']}
|
170
|
+
%link{property: "earl:subject", :href => assertion['subject']}
|
171
|
+
- if assertion['mode']
|
172
|
+
%link{property: 'earl:mode', :href => assertion['mode']}
|
173
|
+
%span{property: "earl:result", typeof: assertion['result']['@type']}
|
174
|
+
%span{property: 'earl:outcome', resource: assertion['result']['outcome']}
|
175
|
+
= pass_fail
|
176
|
+
%tr.summary
|
177
|
+
%td
|
178
|
+
= "Percentage passed out of #{manifest['entries'].length} Tests"
|
179
|
+
- passed_tests.compact.each do |r|
|
180
|
+
- pct = (r * 100.0) / manifest['entries'].length
|
181
|
+
%td{:class => (pct == 100.0 ? 'passed-all' : (pct >= 95.0 ? 'passed-most' : 'passed-some'))}
|
182
|
+
= "#{'%.1f' % pct}%"
|
183
|
+
%section#appendix{property: "earl:generatedBy", resource: tests['generatedBy']['@id'], typeof: tests['generatedBy']['@type']}
|
184
|
+
%h2
|
185
|
+
Report Generation Software
|
186
|
+
- doap = tests['generatedBy']
|
187
|
+
- rel = doap['release']
|
188
|
+
%p
|
189
|
+
This report generated by
|
190
|
+
%span{property: "doap:name"}<
|
191
|
+
%a{:href => tests['generatedBy']['@id']}<
|
192
|
+
= doap['name']
|
193
|
+
%meta{property: "doap:shortdesc", :content => doap['shortdesc'], :lang => 'en'}
|
194
|
+
%meta{property: "doap:description", :content => doap['doapDesc'], :lang => 'en'}
|
195
|
+
version
|
196
|
+
%span{property: "doap:release", resource: rel['@id'], typeof: 'doap:Version'}
|
197
|
+
%span{property: "doap:revision"}<=rel['revision']
|
198
|
+
%meta{property: "doap:name", :content => rel['name']}
|
199
|
+
%meta{property: "doap:created", :content => rel['created'], :datatype => "xsd:date"}
|
200
|
+
an
|
201
|
+
%a{property: "doap:license", :href => doap['license']}<="Unlicensed"
|
202
|
+
%span{property: "doap:programming-language"}<="Ruby"
|
203
|
+
application. More information is available at
|
204
|
+
%a{property: "doap:homepage", :href => doap['homepage']}<=doap['homepage']
|
205
|
+
= "."
|
data/etc/well-known
ADDED
data/lib/rdf/tabular.rb
CHANGED
@@ -1,9 +1,5 @@
|
|
1
1
|
$:.unshift(File.expand_path("..", __FILE__))
|
2
2
|
require 'rdf' # @see http://rubygems.org/gems/rdf
|
3
|
-
begin
|
4
|
-
require 'byebug' # REMOVE ME
|
5
|
-
rescue LoadError
|
6
|
-
end
|
7
3
|
require 'csv'
|
8
4
|
|
9
5
|
module RDF
|
@@ -28,6 +24,16 @@ module RDF
|
|
28
24
|
autoload :Transformation, 'rdf/tabular/metadata'
|
29
25
|
autoload :VERSION, 'rdf/tabular/version'
|
30
26
|
|
27
|
+
# Metadata errors detected
|
28
|
+
class Error < RDF::ReaderError; end
|
29
|
+
|
30
|
+
# Relative location of site-wide configuration file
|
31
|
+
SITE_WIDE_CONFIG = "/.well-known/csvm".freeze
|
32
|
+
SITE_WIDE_DEFAULT = %(
|
33
|
+
{+url}-metadata.json
|
34
|
+
csv-metadata.json
|
35
|
+
).gsub(/^\s+/, '').freeze
|
36
|
+
|
31
37
|
def self.debug?; @debug; end
|
32
38
|
def self.debug=(value); @debug = value; end
|
33
39
|
end
|
data/lib/rdf/tabular/format.rb
CHANGED
@@ -8,12 +8,13 @@ module RDF::Tabular
|
|
8
8
|
# RDF::Format.for(:tsv) #=> RDF::Tabular::Format
|
9
9
|
# RDF::Format.for("etc/foaf.csv")
|
10
10
|
# RDF::Format.for("etc/foaf.tsv")
|
11
|
-
# RDF::Format.for(:
|
12
|
-
# RDF::Format.for(:
|
13
|
-
# RDF::Format.for(:
|
14
|
-
# RDF::Format.for(:
|
15
|
-
# RDF::Format.for(:
|
16
|
-
# RDF::Format.for(:
|
11
|
+
# RDF::Format.for(file_name: "etc/foaf.csv")
|
12
|
+
# RDF::Format.for(file_name: "etc/foaf.tsv")
|
13
|
+
# RDF::Format.for(file_extension: "csv")
|
14
|
+
# RDF::Format.for(file_extension: "tsv")
|
15
|
+
# RDF::Format.for(content_type: "text/csv")
|
16
|
+
# RDF::Format.for(content_type: "text/tab-separated-values")
|
17
|
+
# RDF::Format.for(content_type: "application/csvm+json")
|
17
18
|
#
|
18
19
|
# @example Obtaining serialization format MIME types
|
19
20
|
# RDF::Format.content_types #=> {"text/csv" => [RDF::Tabular::Format]}
|
@@ -25,7 +26,10 @@ module RDF::Tabular
|
|
25
26
|
class Format < RDF::Format
|
26
27
|
content_type 'text/csv',
|
27
28
|
extensions: [:csv, :tsv],
|
28
|
-
alias:
|
29
|
+
alias: %w{
|
30
|
+
text/tab-separated-values
|
31
|
+
application/csvm+json
|
32
|
+
}
|
29
33
|
content_encoding 'utf-8'
|
30
34
|
|
31
35
|
reader { RDF::Tabular::Reader }
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -11,8 +11,7 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
11
11
|
# CSVM Metadata processor
|
12
12
|
#
|
13
13
|
# * Extracts Metadata from file or Hash definition
|
14
|
-
# *
|
15
|
-
# * Extract Metadata from a CSV file
|
14
|
+
# * Extract Embedded Metadata from a CSV file
|
16
15
|
# * Return table-level annotations
|
17
16
|
# * Return Column-level annotations
|
18
17
|
# * Return row iterator with column information
|
@@ -45,30 +44,28 @@ module RDF::Tabular
|
|
45
44
|
valueUrl: :uri_template,
|
46
45
|
}.freeze
|
47
46
|
INHERITED_DEFAULTS = {
|
48
|
-
aboutUrl: "".freeze,
|
49
47
|
default: "".freeze,
|
50
48
|
lang: "und",
|
51
49
|
null: "".freeze,
|
52
50
|
ordered: false,
|
53
|
-
propertyUrl: "".freeze,
|
54
51
|
required: false,
|
55
52
|
textDirection: "ltr".freeze,
|
56
|
-
valueUrl: "".freeze,
|
57
53
|
}.freeze
|
58
54
|
|
59
55
|
# Valid datatypes
|
60
56
|
DATATYPES = {
|
61
|
-
anyAtomicType: RDF::XSD.
|
57
|
+
anyAtomicType: RDF::XSD.anyAtomicType,
|
62
58
|
anyURI: RDF::XSD.anyURI,
|
63
59
|
base64Binary: RDF::XSD.basee65Binary,
|
64
60
|
boolean: RDF::XSD.boolean,
|
65
61
|
byte: RDF::XSD.byte,
|
66
62
|
date: RDF::XSD.date,
|
67
63
|
dateTime: RDF::XSD.dateTime,
|
68
|
-
|
64
|
+
dayTimeDuration: RDF::XSD.dayTimeDuration,
|
69
65
|
dateTimeStamp: RDF::XSD.dateTimeStamp,
|
70
66
|
decimal: RDF::XSD.decimal,
|
71
67
|
double: RDF::XSD.double,
|
68
|
+
duration: RDF::XSD.duration,
|
72
69
|
float: RDF::XSD.float,
|
73
70
|
ENTITY: RDF::XSD.ENTITY,
|
74
71
|
gDay: RDF::XSD.gDay,
|
@@ -84,6 +81,7 @@ module RDF::Tabular
|
|
84
81
|
Name: RDF::XSD.Name,
|
85
82
|
NCName: RDF::XSD.NCName,
|
86
83
|
negativeInteger: RDF::XSD.negativeInteger,
|
84
|
+
NMTOKEN: RDF::XSD.NMTOKEN,
|
87
85
|
nonNegativeInteger: RDF::XSD.nonNegativeInteger,
|
88
86
|
nonPositiveInteger: RDF::XSD.nonPositiveInteger,
|
89
87
|
normalizedString: RDF::XSD.normalizedString,
|
@@ -100,7 +98,7 @@ module RDF::Tabular
|
|
100
98
|
unsignedShort: RDF::XSD.unsignedShort,
|
101
99
|
yearMonthDuration: RDF::XSD.yearMonthDuration,
|
102
100
|
|
103
|
-
any: RDF::XSD.
|
101
|
+
any: RDF::XSD.anyAtomicType,
|
104
102
|
binary: RDF::XSD.base64Binary,
|
105
103
|
datetime: RDF::XSD.dateTime,
|
106
104
|
html: RDF.HTML,
|
@@ -115,7 +113,7 @@ module RDF::Tabular
|
|
115
113
|
|
116
114
|
# Local version of the context
|
117
115
|
# @return [JSON::LD::Context]
|
118
|
-
LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__))
|
116
|
+
LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__)).freeze
|
119
117
|
|
120
118
|
# ID of this Metadata
|
121
119
|
# @return [RDF::URI]
|
@@ -139,7 +137,9 @@ module RDF::Tabular
|
|
139
137
|
#
|
140
138
|
# @param [String] path
|
141
139
|
# @param [Hash{Symbol => Object}] options
|
142
|
-
# see `RDF::Util::File.open_file` in RDF.rb
|
140
|
+
# see `RDF::Util::File.open_file` in RDF.rb and {#new}
|
141
|
+
# @yield [Metadata]
|
142
|
+
# @raise [IOError] if file not found
|
143
143
|
def self.open(path, options = {})
|
144
144
|
options = options.merge(
|
145
145
|
headers: {
|
@@ -152,8 +152,25 @@ module RDF::Tabular
|
|
152
152
|
end
|
153
153
|
end
|
154
154
|
|
155
|
+
# Return the well-known configuration for a file, and remember using a weak-reference cache to avoid uncessary retreivles.
|
156
|
+
# @param [String] base, the URL used for finding the file
|
157
|
+
# @return [Array<String>, false]
|
158
|
+
def self.site_wide_config(base)
|
159
|
+
require 'rdf/util/cache' unless defined?(::RDF::Util::Cache)
|
160
|
+
@cache ||= RDF::Util::Cache.new(-1)
|
161
|
+
|
162
|
+
config_loc = RDF::URI(base).join(SITE_WIDE_CONFIG).to_s
|
163
|
+
# Only load if we haven't tried before. Use `SITE_WIDE_DEFAULT` if not found
|
164
|
+
if @cache[config_loc].nil?
|
165
|
+
@cache[config_loc] = RDF::Util::File.open_file(config_loc) do |rd|
|
166
|
+
rd.each_line.to_a
|
167
|
+
end rescue SITE_WIDE_DEFAULT.split
|
168
|
+
end
|
169
|
+
@cache[config_loc]
|
170
|
+
end
|
171
|
+
|
155
172
|
##
|
156
|
-
# Return metadata for a file, based on user-specified and
|
173
|
+
# Return metadata for a file, based on user-specified, linked, and site-wide location configuration from an input file
|
157
174
|
# @param [IO, StringIO] input
|
158
175
|
# @param [Hash{Symbol => Object}] options
|
159
176
|
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
@@ -175,22 +192,46 @@ module RDF::Tabular
|
|
175
192
|
# Search for metadata until found
|
176
193
|
|
177
194
|
# load link metadata, if available
|
178
|
-
|
179
|
-
if input.respond_to?(:links) &&
|
195
|
+
all_locs = []
|
196
|
+
if !metadata && input.respond_to?(:links) &&
|
180
197
|
link = input.links.find_link(%w(rel describedby))
|
181
|
-
|
198
|
+
link_loc = RDF::URI(base).join(link.href).to_s
|
199
|
+
md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
|
200
|
+
all_locs << link_loc if md
|
201
|
+
# Metadata must describe file to be useful
|
202
|
+
metadata = md if md && md.describes_file?(base)
|
182
203
|
end
|
183
204
|
|
184
|
-
|
185
|
-
|
205
|
+
locs = []
|
206
|
+
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
207
|
+
if !metadata && base
|
208
|
+
templates = site_wide_config(base)
|
209
|
+
debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
210
|
+
locs = templates.map do |template|
|
211
|
+
t = Addressable::Template.new(template)
|
212
|
+
RDF::URI(base).join(t.expand(url: base).to_s)
|
213
|
+
end
|
214
|
+
debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
215
|
+
|
216
|
+
locs.each do |loc|
|
217
|
+
metadata ||= begin
|
218
|
+
md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
219
|
+
# Metadata must describe file to be useful
|
220
|
+
all_locs << loc if md
|
221
|
+
md if md && md.describes_file?(base)
|
222
|
+
rescue IOError
|
223
|
+
debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
224
|
+
nil
|
225
|
+
end
|
226
|
+
end
|
186
227
|
end
|
187
228
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
229
|
+
# If Metadata was found, but no metadata describes the file, issue a warning
|
230
|
+
if !all_locs.empty? && !metadata
|
231
|
+
warnings = options.fetch(:warnings, [])
|
232
|
+
warnings << "Found metadata at #{all_locs.join(",")}, which does not describe #{base}, ignoring"
|
233
|
+
if options[:validate] && !options[:warnings]
|
234
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
194
235
|
end
|
195
236
|
end
|
196
237
|
|
@@ -198,7 +239,7 @@ module RDF::Tabular
|
|
198
239
|
metadata = case
|
199
240
|
when metadata then metadata
|
200
241
|
when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
|
201
|
-
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: []}, options)
|
242
|
+
else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
|
202
243
|
end
|
203
244
|
|
204
245
|
# Make TableGroup, if not already
|
@@ -217,6 +258,8 @@ module RDF::Tabular
|
|
217
258
|
else ::JSON.parse(input.to_s)
|
218
259
|
end
|
219
260
|
|
261
|
+
raise ::JSON::ParserError unless object.is_a?(Hash)
|
262
|
+
|
220
263
|
unless options[:parent]
|
221
264
|
# Add context, if not set (which it should be)
|
222
265
|
object['@context'] ||= options.delete(:@context) || options[:context]
|
@@ -237,7 +280,7 @@ module RDF::Tabular
|
|
237
280
|
when %w(tables).any? {|k| object_keys.include?(k)} then :TableGroup
|
238
281
|
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
239
282
|
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
|
240
|
-
when %w(columns primaryKey foreignKeys).any? {|k| object_keys.include?(k)} then :Schema
|
283
|
+
when %w(columns primaryKey foreignKeys rowTitles).any? {|k| object_keys.include?(k)} then :Schema
|
241
284
|
when %w(name virtual).any? {|k| object_keys.include?(k)} then :Column
|
242
285
|
when %w(commentPrefix delimiter doubleQuote encoding header headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
243
286
|
when %w(lineTerminators quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
|
@@ -251,13 +294,15 @@ module RDF::Tabular
|
|
251
294
|
when :Column then RDF::Tabular::Column
|
252
295
|
when :Dialect then RDF::Tabular::Dialect
|
253
296
|
else
|
254
|
-
raise Error, "
|
297
|
+
raise Error, "Unknown metadata type: #{type.inspect}"
|
255
298
|
end
|
256
299
|
end
|
257
300
|
|
258
301
|
md = klass.allocate
|
259
302
|
md.send(:initialize, object, options)
|
260
303
|
md
|
304
|
+
rescue ::JSON::ParserError
|
305
|
+
raise Error, "Expected input to be a JSON Object"
|
261
306
|
end
|
262
307
|
|
263
308
|
##
|
@@ -271,6 +316,8 @@ module RDF::Tabular
|
|
271
316
|
# Context used for this metadata. Taken from input if not provided
|
272
317
|
# @option options [RDF::URI] :base
|
273
318
|
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
319
|
+
# @option options [Boolean] :normalize normalize the object
|
320
|
+
# @option options [Boolean] :validate Strict metadata validation
|
274
321
|
# @raise [Error]
|
275
322
|
# @return [Metadata]
|
276
323
|
def initialize(input, options = {})
|
@@ -285,15 +332,15 @@ module RDF::Tabular
|
|
285
332
|
@context = case input['@context']
|
286
333
|
when Array
|
287
334
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
288
|
-
LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
|
335
|
+
LOCAL_CONTEXT.dup.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
|
289
336
|
when Hash
|
290
337
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
291
|
-
LOCAL_CONTEXT.parse(input['@context'])
|
292
|
-
when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT
|
338
|
+
LOCAL_CONTEXT.dup.parse(input['@context'])
|
339
|
+
when "http://www.w3.org/ns/csvw" then LOCAL_CONTEXT.dup
|
293
340
|
else
|
294
341
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
295
342
|
warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
296
|
-
LOCAL_CONTEXT
|
343
|
+
LOCAL_CONTEXT.dup
|
297
344
|
end
|
298
345
|
end
|
299
346
|
|
@@ -326,17 +373,17 @@ module RDF::Tabular
|
|
326
373
|
when :url
|
327
374
|
# URL of CSV relative to metadata
|
328
375
|
object[:url] = value
|
329
|
-
@url = base.join(value)
|
330
|
-
@
|
376
|
+
@url = @options[:base].join(value)
|
377
|
+
@options[:base] = @url if @context # Use as base for expanding IRIs
|
331
378
|
when :@id
|
332
379
|
# metadata identifier
|
333
380
|
object[:@id] = if value.is_a?(String)
|
334
381
|
value
|
335
382
|
else
|
336
383
|
warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
337
|
-
""
|
384
|
+
"" # Default value
|
338
385
|
end
|
339
|
-
@id = base.join(object[:@id])
|
386
|
+
@id = @options[:base].join(object[:@id])
|
340
387
|
else
|
341
388
|
if @properties.has_key?(key) || INHERITED_PROPERTIES.has_key?(key)
|
342
389
|
self.send("#{key}=".to_sym, value)
|
@@ -348,7 +395,15 @@ module RDF::Tabular
|
|
348
395
|
end
|
349
396
|
|
350
397
|
# Set type from @type, if present and not otherwise defined
|
351
|
-
@type
|
398
|
+
@type = object[:@type].to_sym if object[:@type]
|
399
|
+
|
400
|
+
if options[:normalize]
|
401
|
+
# If normalizing, also remove remaining @context
|
402
|
+
self.normalize!
|
403
|
+
@context = nil
|
404
|
+
object.delete(:@context)
|
405
|
+
end
|
406
|
+
|
352
407
|
if reason
|
353
408
|
debug("md#initialize") {reason}
|
354
409
|
debug("md#initialize") {"filenames: #{filenames}"}
|
@@ -356,8 +411,14 @@ module RDF::Tabular
|
|
356
411
|
end
|
357
412
|
end
|
358
413
|
|
359
|
-
# Setters
|
414
|
+
# Getters and Setters
|
360
415
|
INHERITED_PROPERTIES.keys.each do |key|
|
416
|
+
define_method(key) do
|
417
|
+
object.fetch(key) do
|
418
|
+
parent ? parent.send(key) : default_value(key)
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
361
422
|
define_method("#{key}=".to_sym) do |value|
|
362
423
|
invalid = case key
|
363
424
|
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
@@ -370,7 +431,7 @@ module RDF::Tabular
|
|
370
431
|
when :ordered, :required
|
371
432
|
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
372
433
|
when :separator
|
373
|
-
"
|
434
|
+
"string or null" unless value.nil? || value.is_a?(String)
|
374
435
|
when :textDirection
|
375
436
|
"rtl or ltr" unless %(rtl ltr).include?(value)
|
376
437
|
when :datatype
|
@@ -379,7 +440,7 @@ module RDF::Tabular
|
|
379
440
|
|
380
441
|
if invalid
|
381
442
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
382
|
-
object
|
443
|
+
object.delete(key)
|
383
444
|
else
|
384
445
|
object[key] = value
|
385
446
|
end
|
@@ -399,18 +460,19 @@ module RDF::Tabular
|
|
399
460
|
# An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
|
400
461
|
# when loading a remote schema, assign @id from it's location if not already set
|
401
462
|
def tableSchema=(value)
|
402
|
-
case value
|
463
|
+
object[:tableSchema] = case value
|
403
464
|
when String
|
404
|
-
link = base.join(value).to_s
|
405
|
-
|
406
|
-
|
407
|
-
|
465
|
+
link = context.base.join(value).to_s
|
466
|
+
md = Schema.open(link, @options.merge(parent: self, context: nil, normalize: true))
|
467
|
+
md[:@id] ||= link
|
468
|
+
md
|
408
469
|
when Hash
|
409
|
-
|
470
|
+
Schema.new(value, @options.merge(parent: self, context: nil))
|
410
471
|
when Schema
|
411
|
-
|
472
|
+
value
|
412
473
|
else
|
413
474
|
warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
475
|
+
Schema.new({}, @options.merge(parent: self, context: nil))
|
414
476
|
end
|
415
477
|
end
|
416
478
|
|
@@ -445,13 +507,16 @@ module RDF::Tabular
|
|
445
507
|
end
|
446
508
|
|
447
509
|
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
448
|
-
@dialect = case value
|
510
|
+
@dialect = object[:dialect] = case value
|
449
511
|
when String
|
450
|
-
|
512
|
+
link = context.base.join(value).to_s
|
513
|
+
md = Metadata.open(link, @options.merge(parent: self, context: nil, normalize: true))
|
514
|
+
md[:@id] ||= link
|
515
|
+
md
|
451
516
|
when Hash
|
452
|
-
|
517
|
+
Dialect.new(value, @options.merge(parent: self, context: nil))
|
453
518
|
when Dialect
|
454
|
-
|
519
|
+
value
|
455
520
|
else
|
456
521
|
warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
457
522
|
nil
|
@@ -460,16 +525,18 @@ module RDF::Tabular
|
|
460
525
|
|
461
526
|
# Set new datatype
|
462
527
|
# @return [Dialect]
|
528
|
+
# @raise [Error] if datatype is not valid
|
463
529
|
def datatype=(value)
|
464
530
|
val = case value
|
465
531
|
when Hash then Datatype.new(value, parent: self)
|
466
532
|
else Datatype.new({base: value}, parent: self)
|
467
533
|
end
|
468
534
|
|
469
|
-
if val.valid?
|
535
|
+
if val.valid? || value.is_a?(Hash)
|
536
|
+
# Set it if it was specified as an object, which may cause validation errors later
|
470
537
|
object[:datatype] = val
|
471
538
|
else
|
472
|
-
warn "#{type} has invalid property 'datatype': expected a
|
539
|
+
warn "#{type} has invalid property 'datatype': expected a built-in or an object"
|
473
540
|
end
|
474
541
|
end
|
475
542
|
|
@@ -538,7 +605,7 @@ module RDF::Tabular
|
|
538
605
|
value = object[key]
|
539
606
|
case key
|
540
607
|
when :base
|
541
|
-
|
608
|
+
errors << "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
|
542
609
|
when :columns
|
543
610
|
value.each do |v|
|
544
611
|
begin
|
@@ -549,14 +616,20 @@ module RDF::Tabular
|
|
549
616
|
end
|
550
617
|
column_names = value.map(&:name)
|
551
618
|
errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
552
|
-
when :dialect, :tables, :tableSchema, :transformations
|
619
|
+
when :datatype, :dialect, :tables, :tableSchema, :transformations
|
553
620
|
Array(value).each do |t|
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
621
|
+
# Make sure value is of appropriate class
|
622
|
+
if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
|
623
|
+
begin
|
624
|
+
t.validate!
|
625
|
+
rescue Error => e
|
626
|
+
errors << e.message
|
627
|
+
end
|
628
|
+
else
|
629
|
+
errors << "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
|
558
630
|
end
|
559
631
|
end
|
632
|
+
errors << "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
|
560
633
|
when :foreignKeys
|
561
634
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
562
635
|
value.each do |fk|
|
@@ -577,13 +650,13 @@ module RDF::Tabular
|
|
577
650
|
errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
578
651
|
end
|
579
652
|
# resource is the URL of a Table in the TableGroup
|
580
|
-
ref = base.join(reference['resource']).to_s
|
653
|
+
ref = context.base.join(reference['resource']).to_s
|
581
654
|
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
582
655
|
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
583
656
|
table.tableSchema if table
|
584
657
|
elsif reference.has_key?('schemaReference')
|
585
658
|
# resource is the @id of a Schema in the TableGroup
|
586
|
-
ref = base.join(reference['schemaReference']).to_s
|
659
|
+
ref = context.base.join(reference['schemaReference']).to_s
|
587
660
|
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
588
661
|
case tables.length
|
589
662
|
when 0
|
@@ -608,6 +681,114 @@ module RDF::Tabular
|
|
608
681
|
errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
609
682
|
end
|
610
683
|
end
|
684
|
+
when :format
|
685
|
+
case value
|
686
|
+
when Hash
|
687
|
+
# Object form only appropriate for numeric type
|
688
|
+
unless %w(
|
689
|
+
decimal integer long int short byte double float number
|
690
|
+
nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
|
691
|
+
unsignedLong unsignedInt unsignedShort unsignedByte
|
692
|
+
).include?(self.base)
|
693
|
+
warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
|
694
|
+
object.delete(:format) # act as if not set
|
695
|
+
end
|
696
|
+
|
697
|
+
# Otherwise, if it exists, its a UAX35 number pattern
|
698
|
+
begin
|
699
|
+
parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
|
700
|
+
rescue ArgumentError => e
|
701
|
+
warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
|
702
|
+
object[:format].delete("pattern") # act as if not set
|
703
|
+
end
|
704
|
+
else
|
705
|
+
case self.base
|
706
|
+
when 'boolean'
|
707
|
+
unless value.split("|").length == 2
|
708
|
+
warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
|
709
|
+
object.delete(:format) # act as if not set
|
710
|
+
end
|
711
|
+
when :decimal, :integer, :long, :int, :short, :byte,
|
712
|
+
:nonNegativeInteger, :positiveInteger,
|
713
|
+
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
714
|
+
:nonPositiveInteger, :negativeInteger,
|
715
|
+
:double, :float, :number
|
716
|
+
begin
|
717
|
+
parse_uax35_number(value, nil)
|
718
|
+
rescue ArgumentError => e
|
719
|
+
warn "#{type} has invalid property '#{key}': #{e.message}"
|
720
|
+
object.delete(:format) # act as if not set
|
721
|
+
end
|
722
|
+
when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
|
723
|
+
# Parse and validate format
|
724
|
+
begin
|
725
|
+
parse_uax35_date(value, nil)
|
726
|
+
rescue ArgumentError => e
|
727
|
+
warn "#{type} has invalid property '#{key}': #{e.message}"
|
728
|
+
object.delete(:format) # act as if not set
|
729
|
+
end
|
730
|
+
else
|
731
|
+
# Otherwise, if it exists, its a regular expression
|
732
|
+
begin
|
733
|
+
Regexp.compile(value)
|
734
|
+
rescue
|
735
|
+
warn "#{type} has invalid property '#{key}': #{$!.message}"
|
736
|
+
object.delete(:format) # act as if not set
|
737
|
+
end
|
738
|
+
end
|
739
|
+
end
|
740
|
+
when :length, :minLength, :maxLength
|
741
|
+
# Applications must raise an error if both length and minLength are specified and length is less than minLength.
|
742
|
+
# Similarly, applications must raise an error if both length and maxLength are specified and length is greater than maxLength.
|
743
|
+
if object[:length]
|
744
|
+
case key
|
745
|
+
when :minLength
|
746
|
+
errors << "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
|
747
|
+
when :maxLength
|
748
|
+
errors << "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
|
749
|
+
end
|
750
|
+
end
|
751
|
+
|
752
|
+
# Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
|
753
|
+
if key == :maxLength && object[:minLength]
|
754
|
+
errors << "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
|
755
|
+
end
|
756
|
+
|
757
|
+
# Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
|
758
|
+
unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
|
759
|
+
errors << "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
|
760
|
+
end
|
761
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
762
|
+
case self.base
|
763
|
+
when 'decimal', 'integer', 'long', 'int', 'short', 'byte', 'double', 'number', 'float',
|
764
|
+
'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
|
765
|
+
'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
|
766
|
+
'duration', 'dayTimeDuration', 'yearMonthDuration'
|
767
|
+
errors << "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
|
768
|
+
|
769
|
+
case key
|
770
|
+
when :minInclusive
|
771
|
+
# Applications MUST raise an error if both minInclusive and minExclusive are specified
|
772
|
+
errors << "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
|
773
|
+
|
774
|
+
# Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
|
775
|
+
errors << "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
|
776
|
+
|
777
|
+
# Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
|
778
|
+
errors << "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
|
779
|
+
when :maxInclusive
|
780
|
+
# Applications MUST raise an error if both maxInclusive and maxExclusive are specified
|
781
|
+
errors << "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
|
782
|
+
when :minExclusive
|
783
|
+
# Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
|
784
|
+
errors << "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
|
785
|
+
|
786
|
+
# Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
|
787
|
+
errors << "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
|
788
|
+
end
|
789
|
+
else
|
790
|
+
errors << "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
|
791
|
+
end
|
611
792
|
when :notes
|
612
793
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
613
794
|
errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
@@ -617,7 +798,7 @@ module RDF::Tabular
|
|
617
798
|
rescue Error => e
|
618
799
|
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
619
800
|
end
|
620
|
-
when :primaryKey
|
801
|
+
when :primaryKey, :rowTitles
|
621
802
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
622
803
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
623
804
|
Array(value).each do |k|
|
@@ -628,9 +809,18 @@ module RDF::Tabular
|
|
628
809
|
when :@id
|
629
810
|
# Must not be a BNode
|
630
811
|
if value.to_s.start_with?("_:")
|
631
|
-
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:"
|
812
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
813
|
+
end
|
814
|
+
|
815
|
+
# Datatype @id MUST NOT be the URL of a built-in type
|
816
|
+
if self.is_a?(Datatype) && DATATYPES.values.include?(value)
|
817
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
|
632
818
|
end
|
633
819
|
when :@type
|
820
|
+
# Must not be a BNode
|
821
|
+
if value.to_s.start_with?("_:")
|
822
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
823
|
+
end
|
634
824
|
unless value.to_sym == type
|
635
825
|
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
|
636
826
|
end
|
@@ -690,7 +880,9 @@ module RDF::Tabular
|
|
690
880
|
next
|
691
881
|
end
|
692
882
|
number += 1
|
693
|
-
|
883
|
+
row = Row.new(data, self, number, number + skipped, @options)
|
884
|
+
(self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
|
885
|
+
yield(row)
|
694
886
|
end
|
695
887
|
end
|
696
888
|
|
@@ -775,18 +967,39 @@ module RDF::Tabular
|
|
775
967
|
object.keys.any? {|k| k.to_s.include?(':')}
|
776
968
|
end
|
777
969
|
|
970
|
+
# Does this metadata describe the file (URL)?
|
971
|
+
# @param [RDF::URL] url
|
972
|
+
# @return [Boolean]
|
973
|
+
def describes_file?(url)
|
974
|
+
case self
|
975
|
+
when TableGroup
|
976
|
+
tables.any? {|t| t.url == url}
|
977
|
+
else
|
978
|
+
self.url == url
|
979
|
+
end
|
980
|
+
end
|
981
|
+
|
778
982
|
# Verify that the metadata we're using is compatible with embedded metadata
|
779
983
|
# @param [Table] other
|
780
984
|
# @raise [Error] if not compatible
|
781
985
|
def verify_compatible!(other)
|
782
986
|
if self.is_a?(TableGroup)
|
783
987
|
unless tables.any? {|t| t.url == other.url && t.verify_compatible!(other)}
|
784
|
-
|
988
|
+
if @options[:validate]
|
989
|
+
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
990
|
+
else
|
991
|
+
warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
992
|
+
end
|
785
993
|
end
|
786
994
|
else
|
787
995
|
# Tables must have the same url
|
788
|
-
|
789
|
-
|
996
|
+
unless url == other.url
|
997
|
+
if @options[:validate]
|
998
|
+
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
999
|
+
else
|
1000
|
+
warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1001
|
+
end
|
1002
|
+
end
|
790
1003
|
|
791
1004
|
# Each column description within B MUST match the corresponding column description in A for non-virtual columns
|
792
1005
|
non_virtual_columns = Array(tableSchema.columns).reject(&:virtual)
|
@@ -798,23 +1011,41 @@ module RDF::Tabular
|
|
798
1011
|
index = 0
|
799
1012
|
object_columns.all? do |cb|
|
800
1013
|
ca = non_virtual_columns[index]
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
1014
|
+
ta = ca.titles || {}
|
1015
|
+
tb = cb.titles || {}
|
1016
|
+
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1017
|
+
true
|
1018
|
+
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1019
|
+
raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}" unless ca.name == cb.name
|
1020
|
+
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1021
|
+
# If validating, column compatibility requires strict match between titles
|
1022
|
+
titles_match = case
|
1023
|
+
when Array(ta['und']).any? {|t| tb.values.flatten.compact.include?(t)}
|
1024
|
+
true
|
1025
|
+
when Array(tb['und']).any? {|t| ta.values.flatten.compact.include?(t)}
|
1026
|
+
true
|
1027
|
+
when ta.any? {|lang, values| !(Array(tb[lang]) & Array(values)).empty?}
|
1028
|
+
# Match on title and language
|
1029
|
+
true
|
1030
|
+
else
|
1031
|
+
# Match if a language from ta is a prefix of a language from tb with matching titles
|
1032
|
+
ta.any? do |la, values|
|
1033
|
+
tb.keys.any? do |lb|
|
1034
|
+
(la.start_with?(lb) || lb.start_with?(la)) && !(Array(tb[lb]) & Array(values)).empty?
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
if titles_match
|
1040
|
+
true
|
1041
|
+
elsif !@options[:validate]
|
1042
|
+
# If not validating, columns don't match, but processing continues
|
1043
|
+
warn "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
|
1044
|
+
true
|
1045
|
+
else
|
1046
|
+
raise Error, "Columns don't match: ca: #{ca.inspect}, cb: #{cb.inspect}"
|
1047
|
+
end
|
1048
|
+
end
|
818
1049
|
index += 1
|
819
1050
|
end
|
820
1051
|
end
|
@@ -822,7 +1053,7 @@ module RDF::Tabular
|
|
822
1053
|
end
|
823
1054
|
|
824
1055
|
def inspect
|
825
|
-
self.class.name + object.inspect
|
1056
|
+
self.class.name + (respond_to?(:to_atd) ? to_atd : object).inspect
|
826
1057
|
end
|
827
1058
|
|
828
1059
|
# Proxy to @object
|
@@ -830,7 +1061,7 @@ module RDF::Tabular
|
|
830
1061
|
def []=(key, value); object[key] = value; end
|
831
1062
|
def each(&block); object.each(&block); end
|
832
1063
|
def ==(other)
|
833
|
-
object == (other.is_a?(Hash) ? other : other.object)
|
1064
|
+
object == (other.is_a?(Hash) ? other : (other.respond_to?(:object) ? other.object : other))
|
834
1065
|
end
|
835
1066
|
def to_json(args=nil); object.to_json(args); end
|
836
1067
|
|
@@ -845,8 +1076,6 @@ module RDF::Tabular
|
|
845
1076
|
normalize_jsonld(key, value)
|
846
1077
|
when ->(k) {key.to_s == '@context'}
|
847
1078
|
"http://www.w3.org/ns/csvw"
|
848
|
-
when :link
|
849
|
-
base.join(value).to_s
|
850
1079
|
when :array
|
851
1080
|
value = [value] unless value.is_a?(Array)
|
852
1081
|
value.map do |v|
|
@@ -854,13 +1083,15 @@ module RDF::Tabular
|
|
854
1083
|
v.normalize!
|
855
1084
|
elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
|
856
1085
|
# SPEC SUGGESTION: special case for foreignKeys
|
857
|
-
ref["resource"] = base.join(ref["resource"]).to_s if ref["resource"]
|
858
|
-
ref["schemaReference"] = base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
|
1086
|
+
ref["resource"] = context.base.join(ref["resource"]).to_s if ref["resource"]
|
1087
|
+
ref["schemaReference"] = context.base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
|
859
1088
|
v
|
860
1089
|
else
|
861
1090
|
v
|
862
1091
|
end
|
863
1092
|
end
|
1093
|
+
when :link
|
1094
|
+
context.base.join(value).to_s
|
864
1095
|
when :object
|
865
1096
|
case value
|
866
1097
|
when Metadata then value.normalize!
|
@@ -872,6 +1103,14 @@ module RDF::Tabular
|
|
872
1103
|
end
|
873
1104
|
when :natural_language
|
874
1105
|
value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
|
1106
|
+
when :atomic
|
1107
|
+
case key
|
1108
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1109
|
+
# Convert to a typed literal based on `base`. This will be validated later
|
1110
|
+
RDF::Literal(value, datatype: DATATYPES[self.base.to_sym])
|
1111
|
+
else
|
1112
|
+
value
|
1113
|
+
end
|
875
1114
|
else
|
876
1115
|
value
|
877
1116
|
end
|
@@ -901,10 +1140,10 @@ module RDF::Tabular
|
|
901
1140
|
raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
902
1141
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
903
1142
|
raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
|
904
|
-
elsif value['@language'] && !BCP47::Language.identify(value['@language'])
|
905
|
-
warn "Value object with @language must use valid language: #{value.to_json}"
|
1143
|
+
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1144
|
+
warn "Value object with @language must use valid language: #{value.to_json}"
|
906
1145
|
value.delete('@language')
|
907
|
-
elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
|
1146
|
+
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
908
1147
|
raise Error, "Value object with @type must defined type: #{value.to_json}"
|
909
1148
|
end
|
910
1149
|
value
|
@@ -919,7 +1158,7 @@ module RDF::Tabular
|
|
919
1158
|
Array(v).each do |vv|
|
920
1159
|
# Validate that all type values transform to absolute IRIs
|
921
1160
|
resource = context.expand_iri(vv, vocab: true)
|
922
|
-
raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.
|
1161
|
+
raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
|
923
1162
|
end
|
924
1163
|
nv[k] = v
|
925
1164
|
when /^(@|_:)/
|
@@ -981,13 +1220,6 @@ module RDF::Tabular
|
|
981
1220
|
end
|
982
1221
|
end
|
983
1222
|
|
984
|
-
def inherited_property_value(method)
|
985
|
-
# Inherited properties
|
986
|
-
object.fetch(method.to_sym) do
|
987
|
-
parent.send(method) if parent
|
988
|
-
end
|
989
|
-
end
|
990
|
-
|
991
1223
|
def default_value(prop)
|
992
1224
|
self.class.const_get(:DEFAULTS).merge(INHERITED_DEFAULTS)[prop]
|
993
1225
|
end
|
@@ -1033,24 +1265,30 @@ module RDF::Tabular
|
|
1033
1265
|
transformations: :array,
|
1034
1266
|
}.freeze
|
1035
1267
|
DEFAULTS = {
|
1036
|
-
tableDirection: "
|
1268
|
+
tableDirection: "auto".freeze,
|
1037
1269
|
}.freeze
|
1038
1270
|
REQUIRED = [:tables].freeze
|
1039
1271
|
|
1040
|
-
# Setters
|
1272
|
+
# Getters and Setters
|
1041
1273
|
PROPERTIES.each do |key, type|
|
1042
|
-
next if [:
|
1274
|
+
next if [:dialect].include?(key)
|
1275
|
+
|
1276
|
+
define_method(key) do
|
1277
|
+
object.fetch(key, DEFAULTS[key])
|
1278
|
+
end
|
1279
|
+
|
1280
|
+
next if [:tables, :tableSchema, :transformations].include?(key)
|
1043
1281
|
define_method("#{key}=".to_sym) do |value|
|
1044
1282
|
invalid = case key
|
1045
1283
|
when :tableDirection
|
1046
|
-
"rtl, ltr, or
|
1284
|
+
"rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
|
1047
1285
|
when :notes, :tables, :tableSchema, :dialect, :transformations
|
1048
1286
|
# We handle this through a separate setters
|
1049
1287
|
end
|
1050
1288
|
|
1051
1289
|
if invalid
|
1052
1290
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1053
|
-
object
|
1291
|
+
object.delete(key)
|
1054
1292
|
else
|
1055
1293
|
object[key] = value
|
1056
1294
|
end
|
@@ -1063,15 +1301,6 @@ module RDF::Tabular
|
|
1063
1301
|
super || tables.any? {|t| t.has_annotations? }
|
1064
1302
|
end
|
1065
1303
|
|
1066
|
-
# Logic for accessing elements as accessors
|
1067
|
-
def method_missing(method, *args)
|
1068
|
-
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1069
|
-
inherited_property_value(method.to_sym)
|
1070
|
-
else
|
1071
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1072
|
-
end
|
1073
|
-
end
|
1074
|
-
|
1075
1304
|
##
|
1076
1305
|
# Iterate over all tables
|
1077
1306
|
# @yield [Table]
|
@@ -1102,7 +1331,7 @@ module RDF::Tabular
|
|
1102
1331
|
object.inject({
|
1103
1332
|
"@id" => (id.to_s if id),
|
1104
1333
|
"@type" => "AnnotatedTableGroup",
|
1105
|
-
"tables" =>
|
1334
|
+
"tables" => Array(self.tables).map(&:to_atd)
|
1106
1335
|
}) do |memo, (k, v)|
|
1107
1336
|
memo[k.to_s] ||= v
|
1108
1337
|
memo
|
@@ -1124,32 +1353,37 @@ module RDF::Tabular
|
|
1124
1353
|
}.freeze
|
1125
1354
|
DEFAULTS = {
|
1126
1355
|
suppressOutput: false,
|
1127
|
-
tableDirection: "
|
1356
|
+
tableDirection: "auto".freeze,
|
1128
1357
|
}.freeze
|
1129
1358
|
REQUIRED = [:url].freeze
|
1130
1359
|
|
1131
|
-
# Setters
|
1360
|
+
# Getters and Setters
|
1132
1361
|
PROPERTIES.each do |key, type|
|
1133
|
-
next if [:
|
1362
|
+
next if [:dialect, :url].include?(key)
|
1363
|
+
define_method(key) do
|
1364
|
+
object.fetch(key, DEFAULTS[key])
|
1365
|
+
end
|
1366
|
+
|
1367
|
+
next if [:tableSchema, :transformations].include?(key)
|
1134
1368
|
define_method("#{key}=".to_sym) do |value|
|
1135
1369
|
invalid = case key
|
1136
1370
|
when :suppressOutput
|
1137
1371
|
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
1138
1372
|
when :tableDirection
|
1139
|
-
"rtl, ltr, or
|
1373
|
+
"rtl, ltr, or auto" unless %(rtl ltr auto).include?(value)
|
1140
1374
|
when :url
|
1141
|
-
"valid URL" unless value.is_a?(String) && base.join(value).valid?
|
1375
|
+
"valid URL" unless value.is_a?(String) && context.base.join(value).valid?
|
1142
1376
|
when :notes, :tableSchema, :dialect, :transformations
|
1143
1377
|
# We handle this through a separate setters
|
1144
1378
|
end
|
1145
1379
|
|
1146
1380
|
if invalid
|
1147
1381
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1148
|
-
object
|
1382
|
+
object.delete(key)
|
1149
1383
|
elsif key == :url
|
1150
1384
|
# URL of CSV relative to metadata
|
1151
1385
|
object[:url] = value
|
1152
|
-
@url = base.join(value)
|
1386
|
+
@url = context.base.join(value)
|
1153
1387
|
@context.base = @url if @context # Use as base for expanding IRIs
|
1154
1388
|
else
|
1155
1389
|
object[key] = value
|
@@ -1180,22 +1414,13 @@ module RDF::Tabular
|
|
1180
1414
|
"@id" => (id.to_s if id),
|
1181
1415
|
"@type" => "AnnotatedTable",
|
1182
1416
|
"url" => self.url.to_s,
|
1183
|
-
"columns" => tableSchema.columns.map(&:to_atd),
|
1417
|
+
"columns" => Array(tableSchema ? tableSchema.columns : []).map(&:to_atd),
|
1184
1418
|
"rows" => []
|
1185
1419
|
}) do |memo, (k, v)|
|
1186
1420
|
memo[k.to_s] ||= v
|
1187
1421
|
memo
|
1188
1422
|
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata) || k.to_s == "@context"}
|
1189
1423
|
end
|
1190
|
-
|
1191
|
-
# Logic for accessing elements as accessors
|
1192
|
-
def method_missing(method, *args)
|
1193
|
-
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1194
|
-
inherited_property_value(method.to_sym)
|
1195
|
-
else
|
1196
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1197
|
-
end
|
1198
|
-
end
|
1199
1424
|
end
|
1200
1425
|
|
1201
1426
|
class Schema < Metadata
|
@@ -1205,21 +1430,26 @@ module RDF::Tabular
|
|
1205
1430
|
columns: :array,
|
1206
1431
|
foreignKeys: :array,
|
1207
1432
|
primaryKey: :column_reference,
|
1433
|
+
rowTitles: :column_reference,
|
1208
1434
|
}.freeze
|
1209
1435
|
DEFAULTS = {}.freeze
|
1210
1436
|
REQUIRED = [].freeze
|
1211
1437
|
|
1212
|
-
# Setters
|
1438
|
+
# Getters and Setters
|
1213
1439
|
PROPERTIES.each do |key, type|
|
1440
|
+
define_method(key) do
|
1441
|
+
object.fetch(key, DEFAULTS[key])
|
1442
|
+
end
|
1443
|
+
|
1214
1444
|
define_method("#{key}=".to_sym) do |value|
|
1215
1445
|
invalid = case key
|
1216
|
-
when :primaryKey
|
1446
|
+
when :primaryKey, :rowTitles
|
1217
1447
|
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
1218
1448
|
end
|
1219
1449
|
|
1220
1450
|
if invalid
|
1221
1451
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1222
|
-
object
|
1452
|
+
object.delete(key)
|
1223
1453
|
else
|
1224
1454
|
object[key] = value
|
1225
1455
|
end
|
@@ -1270,12 +1500,21 @@ module RDF::Tabular
|
|
1270
1500
|
end
|
1271
1501
|
end
|
1272
1502
|
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1503
|
+
##
|
1504
|
+
# List of foreign keys referencing the specified table
|
1505
|
+
#
|
1506
|
+
# @param [Table] table
|
1507
|
+
# @return [Array<Hash>]
|
1508
|
+
def foreign_keys_referencing(table)
|
1509
|
+
Array(foreignKeys).select do |fk|
|
1510
|
+
reference = fk['reference']
|
1511
|
+
if reference['resource']
|
1512
|
+
ref = context.base.join(reference['resource']).to_s
|
1513
|
+
table.url == ref
|
1514
|
+
else # schemaReference
|
1515
|
+
ref = context.base.join(reference['schemaReference']).to_s
|
1516
|
+
table.tableSchema.id == ref
|
1517
|
+
end
|
1279
1518
|
end
|
1280
1519
|
end
|
1281
1520
|
end
|
@@ -1321,8 +1560,12 @@ module RDF::Tabular
|
|
1321
1560
|
super || columns.any? {|c| c.has_annotations? }
|
1322
1561
|
end
|
1323
1562
|
|
1324
|
-
# Setters
|
1563
|
+
# Getters and Setters
|
1325
1564
|
PROPERTIES.each do |key, t|
|
1565
|
+
define_method(key) do
|
1566
|
+
object.fetch(key, DEFAULTS[key])
|
1567
|
+
end
|
1568
|
+
|
1326
1569
|
define_method("#{key}=".to_sym) do |value|
|
1327
1570
|
invalid = case key
|
1328
1571
|
when :name
|
@@ -1339,7 +1582,7 @@ module RDF::Tabular
|
|
1339
1582
|
object.delete(key) if object[key].nil?
|
1340
1583
|
elsif invalid
|
1341
1584
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1342
|
-
object
|
1585
|
+
object.delete(key)
|
1343
1586
|
else
|
1344
1587
|
object[key] = value
|
1345
1588
|
end
|
@@ -1360,7 +1603,7 @@ module RDF::Tabular
|
|
1360
1603
|
# @return [RDF::URI]
|
1361
1604
|
def id;
|
1362
1605
|
url = table ? table.url : RDF::URI("")
|
1363
|
-
url + "#col=#{self.sourceNumber}";
|
1606
|
+
url.to_s + "#col=#{self.sourceNumber}";
|
1364
1607
|
end
|
1365
1608
|
|
1366
1609
|
# Return Annotated Column representation
|
@@ -1380,15 +1623,6 @@ module RDF::Tabular
|
|
1380
1623
|
memo
|
1381
1624
|
end.delete_if {|k,v| v.nil?}
|
1382
1625
|
end
|
1383
|
-
|
1384
|
-
# Logic for accessing elements as accessors
|
1385
|
-
def method_missing(method, *args)
|
1386
|
-
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1387
|
-
inherited_property_value(method.to_sym)
|
1388
|
-
else
|
1389
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1390
|
-
end
|
1391
|
-
end
|
1392
1626
|
end
|
1393
1627
|
|
1394
1628
|
class Transformation < Metadata
|
@@ -1404,8 +1638,13 @@ module RDF::Tabular
|
|
1404
1638
|
DEFAULTS = {}.freeze
|
1405
1639
|
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1406
1640
|
|
1407
|
-
# Setters
|
1641
|
+
# Getters and Setters
|
1408
1642
|
PROPERTIES.each do |key, type|
|
1643
|
+
next if [:url].include?(key)
|
1644
|
+
define_method(key) do
|
1645
|
+
object.fetch(key, DEFAULTS[key])
|
1646
|
+
end
|
1647
|
+
|
1409
1648
|
define_method("#{key}=".to_sym) do |value|
|
1410
1649
|
invalid = case key
|
1411
1650
|
when :scriptFormat, :targetFormat
|
@@ -1416,17 +1655,12 @@ module RDF::Tabular
|
|
1416
1655
|
|
1417
1656
|
if invalid
|
1418
1657
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1419
|
-
object
|
1658
|
+
object.delete(key)
|
1420
1659
|
else
|
1421
1660
|
object[key] = value
|
1422
1661
|
end
|
1423
1662
|
end
|
1424
1663
|
end
|
1425
|
-
|
1426
|
-
# Logic for accessing elements as accessors
|
1427
|
-
def method_missing(method, *args)
|
1428
|
-
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1429
|
-
end
|
1430
1664
|
end
|
1431
1665
|
|
1432
1666
|
class Dialect < Metadata
|
@@ -1444,7 +1678,7 @@ module RDF::Tabular
|
|
1444
1678
|
skipColumns: 0,
|
1445
1679
|
skipInitialSpace: false,
|
1446
1680
|
skipRows: 0,
|
1447
|
-
trim:
|
1681
|
+
trim: true
|
1448
1682
|
}.freeze
|
1449
1683
|
|
1450
1684
|
PROPERTIES = {
|
@@ -1467,13 +1701,15 @@ module RDF::Tabular
|
|
1467
1701
|
|
1468
1702
|
REQUIRED = [].freeze
|
1469
1703
|
|
1470
|
-
# Setters
|
1704
|
+
# Getters and Setters
|
1471
1705
|
PROPERTIES.keys.each do |key|
|
1706
|
+
define_method(key) do
|
1707
|
+
object.fetch(key, DEFAULTS[key])
|
1708
|
+
end
|
1709
|
+
|
1472
1710
|
define_method("#{key}=".to_sym) do |value|
|
1473
1711
|
invalid = case key
|
1474
|
-
when :commentPrefix, :delimiter, :quoteChar
|
1475
|
-
"a single character string" unless value.is_a?(String) && value.length == 1
|
1476
|
-
when :lineTerminators
|
1712
|
+
when :commentPrefix, :delimiter, :quoteChar, :lineTerminators
|
1477
1713
|
"a string" unless value.is_a?(String)
|
1478
1714
|
when :doubleQuote, :header, :skipInitialSpace, :skipBlankRows
|
1479
1715
|
"boolean true or false" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
@@ -1493,7 +1729,7 @@ module RDF::Tabular
|
|
1493
1729
|
object.delete(key) if object[key].nil?
|
1494
1730
|
elsif invalid
|
1495
1731
|
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1496
|
-
object
|
1732
|
+
object.delete(key)
|
1497
1733
|
else
|
1498
1734
|
object[key] = value
|
1499
1735
|
end
|
@@ -1515,7 +1751,7 @@ module RDF::Tabular
|
|
1515
1751
|
# default for trim comes from skipInitialSpace
|
1516
1752
|
# @return [Boolean, String]
|
1517
1753
|
def trim
|
1518
|
-
object.fetch(:trim, self.skipInitialSpace ? 'start' :
|
1754
|
+
object.fetch(:trim, self.skipInitialSpace ? 'start' : true)
|
1519
1755
|
end
|
1520
1756
|
|
1521
1757
|
##
|
@@ -1546,7 +1782,8 @@ module RDF::Tabular
|
|
1546
1782
|
}
|
1547
1783
|
}
|
1548
1784
|
metadata ||= table # In case the embedded metadata becomes the final metadata
|
1549
|
-
metadata["lang"] = options[:lang] if options[:lang]
|
1785
|
+
lang = metadata["lang"] = options[:lang] if options[:lang]
|
1786
|
+
lang ||= 'und'
|
1550
1787
|
|
1551
1788
|
# Set encoding on input
|
1552
1789
|
csv = ::CSV.new(input, csv_options)
|
@@ -1575,9 +1812,9 @@ module RDF::Tabular
|
|
1575
1812
|
# Initialize titles
|
1576
1813
|
columns = table["tableSchema"]["columns"] ||= []
|
1577
1814
|
column = columns[index - skipCols] ||= {
|
1578
|
-
"titles" => {
|
1815
|
+
"titles" => {lang => []},
|
1579
1816
|
}
|
1580
|
-
column["titles"][
|
1817
|
+
column["titles"][lang] << value
|
1581
1818
|
end
|
1582
1819
|
end
|
1583
1820
|
debug("embedded_metadata") {"table: #{table.inspect}"}
|
@@ -1585,20 +1822,12 @@ module RDF::Tabular
|
|
1585
1822
|
|
1586
1823
|
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1587
1824
|
end
|
1588
|
-
|
1589
|
-
# Logic for accessing elements as accessors
|
1590
|
-
def method_missing(method, *args)
|
1591
|
-
if DEFAULTS.has_key?(method.to_sym)
|
1592
|
-
# As set, or with default
|
1593
|
-
object.fetch(method.to_sym, DEFAULTS[method.to_sym])
|
1594
|
-
else
|
1595
|
-
super
|
1596
|
-
end
|
1597
|
-
end
|
1598
1825
|
end
|
1599
1826
|
|
1600
1827
|
class Datatype < Metadata
|
1601
1828
|
PROPERTIES = {
|
1829
|
+
:@id => :link,
|
1830
|
+
:@type => :atomic,
|
1602
1831
|
base: :atomic,
|
1603
1832
|
format: :atomic,
|
1604
1833
|
length: :atomic,
|
@@ -1612,50 +1841,248 @@ module RDF::Tabular
|
|
1612
1841
|
maxExclusive: :atomic,
|
1613
1842
|
}.freeze
|
1614
1843
|
REQUIRED = [].freeze
|
1615
|
-
DEFAULTS = {
|
1844
|
+
DEFAULTS = {
|
1845
|
+
base: "string"
|
1846
|
+
}.freeze
|
1616
1847
|
|
1617
1848
|
# Override `base` in Metadata
|
1618
1849
|
def base; object[:base]; end
|
1619
1850
|
|
1620
|
-
# Setters
|
1851
|
+
# Getters and Setters
|
1621
1852
|
PROPERTIES.each do |key, type|
|
1853
|
+
define_method(key) do
|
1854
|
+
object.fetch(key, DEFAULTS[key])
|
1855
|
+
end
|
1856
|
+
|
1622
1857
|
define_method("#{key}=".to_sym) do |value|
|
1623
1858
|
invalid = case key
|
1859
|
+
when :base
|
1860
|
+
"built-in datatype" unless DATATYPES.keys.map(&:to_s).include?(value)
|
1624
1861
|
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1625
1862
|
"numeric or valid date/time" unless value.is_a?(Numeric) ||
|
1626
1863
|
RDF::Literal::Date.new(value.to_s).valid? ||
|
1627
1864
|
RDF::Literal::Time.new(value.to_s).valid? ||
|
1628
1865
|
RDF::Literal::DateTime.new(value.to_s).valid?
|
1629
1866
|
when :format
|
1630
|
-
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
|
1635
|
-
object
|
1867
|
+
case value
|
1868
|
+
when String
|
1869
|
+
nil
|
1870
|
+
when Hash
|
1871
|
+
unless (value.keys.map(&:to_s) - %w(groupChar decimalChar pattern)).empty?
|
1872
|
+
"an object containing only groupChar, decimalChar, and/or pattern"
|
1636
1873
|
end
|
1874
|
+
else
|
1875
|
+
"a string or object"
|
1637
1876
|
end
|
1638
1877
|
when :length, :minLength, :maxLength
|
1639
1878
|
if !(value.is_a?(Numeric) && value.integer? && value >= 0)
|
1640
1879
|
"a non-negative integer"
|
1641
|
-
elsif key != :length && object[:length] && value != object[:length]
|
1642
|
-
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
1643
|
-
"both length and #{key} requires they be equal"
|
1644
1880
|
end
|
1645
1881
|
end
|
1646
1882
|
|
1647
1883
|
if invalid
|
1648
|
-
warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1649
|
-
object
|
1884
|
+
warn "#{self.type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1885
|
+
object.delete(key)
|
1650
1886
|
else
|
1651
1887
|
object[key] = value
|
1652
1888
|
end
|
1653
1889
|
end
|
1654
1890
|
end
|
1655
1891
|
|
1656
|
-
|
1657
|
-
|
1658
|
-
|
1892
|
+
##
|
1893
|
+
# Parse the date format (if provided), and match against the value (if provided)
|
1894
|
+
# Otherwise, validate format and raise an error
|
1895
|
+
#
|
1896
|
+
# @param [String] format
|
1897
|
+
# @param [String] value
|
1898
|
+
# @return [String] XMLSchema version of value
|
1899
|
+
# @raise [ArgumentError] if format is not valid, or nil, if value does not match
|
1900
|
+
def parse_uax35_date(format, value)
|
1901
|
+
tz, date_format, time_format = nil, nil, nil
|
1902
|
+
return value unless format
|
1903
|
+
value ||= ""
|
1904
|
+
|
1905
|
+
# Extract tz info
|
1906
|
+
if md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/)
|
1907
|
+
format, tz = md[1], md[2]
|
1908
|
+
end
|
1909
|
+
|
1910
|
+
date_format, time_format = format.split(' ')
|
1911
|
+
date_format, time_format = nil, date_format if self.base.to_sym == :time
|
1912
|
+
|
1913
|
+
# Extract date, of specified
|
1914
|
+
date_part = case date_format
|
1915
|
+
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
1916
|
+
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
1917
|
+
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
1918
|
+
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
1919
|
+
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
1920
|
+
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
1921
|
+
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
|
1922
|
+
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1923
|
+
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1924
|
+
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1925
|
+
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1926
|
+
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1927
|
+
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1928
|
+
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1929
|
+
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>(?<ms>))/)
|
1930
|
+
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1931
|
+
when /yyyy-MM-ddTHH:mm:ss\.S+/
|
1932
|
+
md = value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1933
|
+
num_ms = date_format.match(/S+/).to_s.length
|
1934
|
+
md if md && md[:ms].length <= num_ms
|
1935
|
+
else
|
1936
|
+
raise ArgumentError, "unrecognized date/time format #{date_format}" if date_format
|
1937
|
+
nil
|
1938
|
+
end
|
1939
|
+
|
1940
|
+
# Forward past date part
|
1941
|
+
if date_part
|
1942
|
+
value = value[date_part.to_s.length..-1]
|
1943
|
+
value = value.lstrip if date_part && value.start_with?(' ')
|
1944
|
+
end
|
1945
|
+
|
1946
|
+
# Extract time, of specified
|
1947
|
+
time_part = case time_format
|
1948
|
+
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})(?<ms>)/)
|
1949
|
+
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})(?<ms>)/)
|
1950
|
+
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1951
|
+
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)(?<ms>)/)
|
1952
|
+
when /HH:mm:ss\.S+/
|
1953
|
+
md = value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})\.(?<ms>\d+)/)
|
1954
|
+
num_ms = time_format.match(/S+/).to_s.length
|
1955
|
+
md if md && md[:ms].length <= num_ms
|
1956
|
+
else
|
1957
|
+
raise ArgumentError, "unrecognized date/time format #{time_format}" if time_format
|
1958
|
+
nil
|
1959
|
+
end
|
1960
|
+
|
1961
|
+
# If there's a date_format but no date_part, match fails
|
1962
|
+
return nil if date_format && date_part.nil?
|
1963
|
+
|
1964
|
+
# If there's a time_format but no time_part, match fails
|
1965
|
+
return nil if time_format && time_part.nil?
|
1966
|
+
|
1967
|
+
# Forward past time part
|
1968
|
+
value = value[time_part.to_s.length..-1] if time_part
|
1969
|
+
|
1970
|
+
# Use datetime match for time
|
1971
|
+
time_part = date_part if date_part && date_part.names.include?("hr")
|
1972
|
+
|
1973
|
+
# If there's a timezone, it may optionally start with whitespace
|
1974
|
+
value = value.lstrip if tz.to_s.start_with?(' ')
|
1975
|
+
tz_part = value if tz
|
1976
|
+
|
1977
|
+
# Compose normalized value
|
1978
|
+
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1979
|
+
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
1980
|
+
|
1981
|
+
# Add milliseconds, if matched
|
1982
|
+
vt += ".#{time_part[:ms]}" if time_part && !time_part[:ms].empty?
|
1983
|
+
|
1984
|
+
value = [vd, vt].compact.join('T')
|
1985
|
+
value += tz_part.to_s
|
1986
|
+
end
|
1987
|
+
|
1988
|
+
##
|
1989
|
+
# Parse the date format (if provided), and match against the value (if provided)
|
1990
|
+
# Otherwise, validate format and raise an error
|
1991
|
+
#
|
1992
|
+
# @param [String] pattern
|
1993
|
+
# @param [String] value
|
1994
|
+
# @param [String] groupChar
|
1995
|
+
# @param [String] decimalChar
|
1996
|
+
# @return [String] XMLSchema version of value or nil, if value does not match
|
1997
|
+
# @raise [ArgumentError] if format is not valid
|
1998
|
+
def parse_uax35_number(pattern, value, groupChar=",", decimalChar=".")
|
1999
|
+
return value if pattern.to_s.empty?
|
2000
|
+
value ||= ""
|
2001
|
+
|
2002
|
+
re = build_number_re(pattern, groupChar, decimalChar)
|
2003
|
+
|
2004
|
+
# Upcase value and remove internal spaces
|
2005
|
+
value = value.upcase.gsub(/\s+/, '')
|
2006
|
+
|
2007
|
+
# Remove groupChar from value
|
2008
|
+
value = value.gsub(groupChar, '')
|
2009
|
+
|
2010
|
+
# Replace decimalChar with "."
|
2011
|
+
value = value.gsub(decimalChar, '.')
|
2012
|
+
|
2013
|
+
if value =~ re
|
2014
|
+
# result re-assembles parts removed from value
|
2015
|
+
value
|
2016
|
+
else
|
2017
|
+
# no match
|
2018
|
+
nil
|
2019
|
+
end
|
2020
|
+
end
|
2021
|
+
|
2022
|
+
# Build a regular expression from the provided pattern to match value, after suitable modifications
|
2023
|
+
#
|
2024
|
+
# @param [String] pattern
|
2025
|
+
# @param [String] groupChar
|
2026
|
+
# @param [String] decimalChar
|
2027
|
+
# @return [Regexp] Regular expression matching value
|
2028
|
+
# @raise [ArgumentError] if format is not valid
|
2029
|
+
def build_number_re(pattern, groupChar, decimalChar)
|
2030
|
+
# pattern must be composed of only 0, #, decimalChar, groupChar, E, +, -, %, and ‰
|
2031
|
+
legal_number_pattern = /\A
|
2032
|
+
([%‰])?
|
2033
|
+
([+-])?
|
2034
|
+
# Mantissa
|
2035
|
+
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2036
|
+
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2037
|
+
# Fractional
|
2038
|
+
(?:#{decimalChar == '.' ? '\.' : decimalChar}
|
2039
|
+
(0|#{groupChar == '.' ? '\.' : groupChar})*
|
2040
|
+
(\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2041
|
+
# Exponent
|
2042
|
+
(E
|
2043
|
+
[+-]?
|
2044
|
+
(?:\#|#{groupChar == '.' ? '\.' : groupChar})*
|
2045
|
+
(?:0|#{groupChar == '.' ? '\.' : groupChar})*
|
2046
|
+
)?
|
2047
|
+
)?
|
2048
|
+
([%‰])?
|
2049
|
+
\Z/x
|
2050
|
+
|
2051
|
+
unless pattern =~ legal_number_pattern
|
2052
|
+
raise ArgumentError, "unrecognized number pattern #{pattern}"
|
2053
|
+
end
|
2054
|
+
|
2055
|
+
# Remove groupChar from pattern
|
2056
|
+
pattern = pattern.gsub(groupChar, '')
|
2057
|
+
|
2058
|
+
# Replace decimalChar with "."
|
2059
|
+
pattern = pattern.gsub(decimalChar, '.')
|
2060
|
+
|
2061
|
+
# Split on decimalChar and E
|
2062
|
+
parts = pattern.split(/[\.E]/)
|
2063
|
+
|
2064
|
+
# Construct regular expression
|
2065
|
+
mantissa_str = case parts[0]
|
2066
|
+
when /\A([%‰])?([+-])?#+(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length},}#{$4}"
|
2067
|
+
when /\A([%‰])?([+-])?(0+)([%‰])?\Z/ then "#{$1}#{$2}\\d{#{$3.length}}#{$4}"
|
2068
|
+
when /\A([%‰])?([+-])?#+([%‰])?\Z/ then "#{$1}#{$2}\\d*#{$4}"
|
2069
|
+
end
|
2070
|
+
|
2071
|
+
fractional_str = case parts[1]
|
2072
|
+
when /\A(0+)(#+)([%‰])?\Z/ then "\\d{#{$1.length},#{$1.length+$2.length}}#{$3}"
|
2073
|
+
when /\A(0+)([%‰])?\Z/ then "\\d{#{$1.length}}#{$2}"
|
2074
|
+
when /\A(#+)([%‰])?\Z/ then "\\d{,#{$1.length}}#{$2}"
|
2075
|
+
end
|
2076
|
+
fractional_str = "\\.#{fractional_str}" if fractional_str
|
2077
|
+
|
2078
|
+
exponent_str = case parts[2]
|
2079
|
+
when /\A([+-])?(#+)(0+)([%‰])?\Z/ then "#{$1}\\d{#{$3.length},#{$2.length+$3.length}}#{$4}"
|
2080
|
+
when /\A([+-])?(0+)([%‰])?\Z/ then "#{$1}\\d{#{$2.length}}#{$3}"
|
2081
|
+
when /\A([+-])?(#+)([%‰])?\Z/ then "#{$1}\\d{,#{$2.length}}#{$3}"
|
2082
|
+
end
|
2083
|
+
exponent_str = "E#{exponent_str}" if exponent_str
|
2084
|
+
|
2085
|
+
Regexp.new("^#{mantissa_str}#{fractional_str}#{exponent_str}$")
|
1659
2086
|
end
|
1660
2087
|
end
|
1661
2088
|
|
@@ -1697,6 +2124,10 @@ module RDF::Tabular
|
|
1697
2124
|
"errors" => self.errors
|
1698
2125
|
}.delete_if {|k,v| Array(v).empty?}
|
1699
2126
|
end
|
2127
|
+
|
2128
|
+
def inspect
|
2129
|
+
self.class.name + to_atd.inspect
|
2130
|
+
end
|
1700
2131
|
end
|
1701
2132
|
|
1702
2133
|
# Row values, hashed by `name`
|
@@ -1715,6 +2146,16 @@ module RDF::Tabular
|
|
1715
2146
|
# @return [Table]
|
1716
2147
|
attr_reader :table
|
1717
2148
|
|
2149
|
+
#
|
2150
|
+
# Cells providing a unique row identifier
|
2151
|
+
# @return [Array<Cell>]
|
2152
|
+
attr_reader :primaryKey
|
2153
|
+
|
2154
|
+
#
|
2155
|
+
# Title(s) of this row
|
2156
|
+
# @return [Array<RDF::Literal>]
|
2157
|
+
attr_reader :titles
|
2158
|
+
|
1718
2159
|
#
|
1719
2160
|
# Context from Table with base set to table URL for expanding URI Templates
|
1720
2161
|
# @return [JSON::LD::Context]
|
@@ -1725,8 +2166,10 @@ module RDF::Tabular
|
|
1725
2166
|
# @param [Metadata] metadata for Table
|
1726
2167
|
# @param [Integer] number 1-based row number after skipped/header rows
|
1727
2168
|
# @param [Integer] source_number 1-based row number from source
|
2169
|
+
# @param [Hash{Symbol => Object}] options ({})
|
2170
|
+
# @option options [Boolean] :validate check for PK/FK consistency
|
1728
2171
|
# @return [Row]
|
1729
|
-
def initialize(row, metadata, number, source_number)
|
2172
|
+
def initialize(row, metadata, number, source_number, options = {})
|
1730
2173
|
@table = metadata
|
1731
2174
|
@number = number
|
1732
2175
|
@sourceNumber = source_number
|
@@ -1748,7 +2191,7 @@ module RDF::Tabular
|
|
1748
2191
|
end
|
1749
2192
|
|
1750
2193
|
# Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
|
1751
|
-
columns.each_with_index {|c, index| row[index] ||=
|
2194
|
+
columns.each_with_index {|c, index| row[index] ||= c.null}
|
1752
2195
|
|
1753
2196
|
row.each_with_index do |value, index|
|
1754
2197
|
|
@@ -1764,7 +2207,7 @@ module RDF::Tabular
|
|
1764
2207
|
|
1765
2208
|
@values << cell = Cell.new(metadata, column, self, value)
|
1766
2209
|
|
1767
|
-
datatype = column.datatype || Datatype.new(base: "string", parent: column)
|
2210
|
+
datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
|
1768
2211
|
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype.base)
|
1769
2212
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype.base)
|
1770
2213
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -1787,23 +2230,35 @@ module RDF::Tabular
|
|
1787
2230
|
v.strip!
|
1788
2231
|
end
|
1789
2232
|
|
1790
|
-
expanded_dt = metadata.context.expand_iri(datatype.base, vocab: true)
|
2233
|
+
expanded_dt = datatype.id || metadata.context.expand_iri(datatype.base, vocab: true)
|
1791
2234
|
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
1792
2235
|
lit_or_errors
|
1793
2236
|
else
|
1794
2237
|
cell_errors += lit_or_errors
|
1795
|
-
RDF::Literal(v, language: column.lang)
|
2238
|
+
RDF::Literal(v, language: (column.lang unless column.lang == "und"))
|
1796
2239
|
end
|
1797
2240
|
end
|
1798
2241
|
end.compact
|
1799
2242
|
|
2243
|
+
# Check for required values
|
2244
|
+
if column.required && (cell_values.any? {|v| v.to_s.empty?} || cell_values.empty?)
|
2245
|
+
cell_errors << "Required column has empty value(s): #{cell_values.map(&:to_s).inspect}"
|
2246
|
+
end
|
1800
2247
|
cell.value = (column.separator ? cell_values : cell_values.first)
|
1801
2248
|
cell.errors = cell_errors
|
1802
|
-
metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
|
1803
2249
|
|
1804
2250
|
map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
|
1805
2251
|
end
|
1806
2252
|
|
2253
|
+
# Record primaryKey if validating
|
2254
|
+
@primaryKey = @values.
|
2255
|
+
select {|cell| Array(table.tableSchema.primaryKey).include?(cell.column.name)} if options[:validate]
|
2256
|
+
|
2257
|
+
# Record any row titles
|
2258
|
+
@titles = @values.
|
2259
|
+
select {|cell| Array(table.tableSchema.rowTitles).include?(cell.column.name)}.
|
2260
|
+
map(&:value)
|
2261
|
+
|
1807
2262
|
# Map URLs for row
|
1808
2263
|
@values.each_with_index do |cell, index|
|
1809
2264
|
mapped_values = map_values.merge(
|
@@ -1824,30 +2279,24 @@ module RDF::Tabular
|
|
1824
2279
|
{
|
1825
2280
|
"@id" => id.to_s,
|
1826
2281
|
"@type" => "Row",
|
1827
|
-
"table" => (table.id
|
2282
|
+
"table" => (table.id || table.url),
|
1828
2283
|
"number" => self.number,
|
1829
2284
|
"sourceNumber" => self.sourceNumber,
|
1830
|
-
"cells" => @values.map(&:
|
2285
|
+
"cells" => @values.map(&:value)
|
1831
2286
|
}.delete_if {|k,v| v.nil?}
|
1832
2287
|
end
|
1833
2288
|
|
2289
|
+
def inspect
|
2290
|
+
self.class.name + to_atd.inspect
|
2291
|
+
end
|
2292
|
+
|
1834
2293
|
private
|
1835
2294
|
#
|
1836
2295
|
# given a datatype specification, return a literal matching that specififcation, if found, otherwise nil
|
1837
2296
|
# @return [RDF::Literal]
|
1838
2297
|
def value_matching_datatype(value, datatype, expanded_dt, language)
|
1839
|
-
value_errors = []
|
1840
|
-
|
1841
|
-
# Check constraints
|
1842
|
-
if datatype.length && value.length != datatype.length
|
1843
|
-
value_errors << "#{value} does not have length #{datatype.length}"
|
1844
|
-
end
|
1845
|
-
if datatype.minLength && value.length < datatype.minLength
|
1846
|
-
value_errors << "#{value} does not have length >= #{datatype.minLength}"
|
1847
|
-
end
|
1848
|
-
if datatype.maxLength && value.length > datatype.maxLength
|
1849
|
-
value_errors << "#{value} does not have length <= #{datatype.maxLength}"
|
1850
|
-
end
|
2298
|
+
lit, value_errors = nil, []
|
2299
|
+
original_value = value.dup
|
1851
2300
|
|
1852
2301
|
format = datatype.format
|
1853
2302
|
# Datatype specific constraints and conversions
|
@@ -1857,29 +2306,39 @@ module RDF::Tabular
|
|
1857
2306
|
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
1858
2307
|
:nonPositiveInteger, :negativeInteger,
|
1859
2308
|
:double, :float, :number
|
2309
|
+
|
1860
2310
|
# Normalize representation based on numeric-specific facets
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
|
2311
|
+
|
2312
|
+
format = case format
|
2313
|
+
when String then {"pattern" => format}
|
2314
|
+
when Hash then format
|
2315
|
+
else {}
|
1866
2316
|
end
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
2317
|
+
|
2318
|
+
groupChar = format["groupChar"]
|
2319
|
+
decimalChar = format["decimalChar"] || '.'
|
2320
|
+
pattern = format["pattern"]
|
2321
|
+
|
2322
|
+
if !datatype.parse_uax35_number(pattern, value, groupChar || ",", decimalChar)
|
2323
|
+
value_errors << "#{value} does not match pattern #{pattern}"
|
1870
2324
|
end
|
1871
|
-
|
1872
|
-
|
2325
|
+
|
2326
|
+
# pattern facet failed
|
2327
|
+
value_errors << "#{value} has repeating #{groupChar.inspect}" if groupChar && value.include?(groupChar*2)
|
2328
|
+
value = value.gsub(groupChar, '') if groupChar
|
2329
|
+
value = value.sub(decimalChar, '.')
|
1873
2330
|
|
1874
2331
|
# Extract percent or per-mille sign
|
1875
2332
|
percent = permille = false
|
1876
|
-
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
2333
|
+
if groupChar
|
2334
|
+
case value
|
2335
|
+
when /%/
|
2336
|
+
value = value.sub('%', '')
|
2337
|
+
percent = true
|
2338
|
+
when /‰/
|
2339
|
+
value = value.sub('‰', '')
|
2340
|
+
permille = true
|
2341
|
+
end
|
1883
2342
|
end
|
1884
2343
|
|
1885
2344
|
lit = RDF::Literal(value, datatype: expanded_dt)
|
@@ -1889,129 +2348,117 @@ module RDF::Tabular
|
|
1889
2348
|
o = o / 1000 if permille
|
1890
2349
|
lit = RDF::Literal(o, datatype: expanded_dt)
|
1891
2350
|
end
|
2351
|
+
|
2352
|
+
if !lit.plain? && datatype.minimum && lit < datatype.minimum
|
2353
|
+
value_errors << "#{value} < minimum #{datatype.minimum}"
|
2354
|
+
end
|
2355
|
+
case
|
2356
|
+
when datatype.minimum && lit < datatype.minimum
|
2357
|
+
value_errors << "#{value} < minimum #{datatype.minimum}"
|
2358
|
+
when datatype.maximum && lit > datatype.maximum
|
2359
|
+
value_errors << "#{value} > maximum #{datatype.maximum}"
|
2360
|
+
when datatype.minInclusive && lit < datatype.minInclusive
|
2361
|
+
value_errors << "#{value} < minInclusive #{datatype.minInclusive}"
|
2362
|
+
when datatype.maxInclusive && lit > datatype.maxInclusive
|
2363
|
+
value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}"
|
2364
|
+
when datatype.minExclusive && lit <= datatype.minExclusive
|
2365
|
+
value_errors << "#{value} <= minExclusive #{datatype.minExclusive}"
|
2366
|
+
when datatype.maxExclusive && lit >= datatype.maxExclusive
|
2367
|
+
value_errors << "#{value} ?= maxExclusive #{datatype.maxExclusive}"
|
2368
|
+
end
|
1892
2369
|
when :boolean
|
1893
|
-
|
2370
|
+
if format
|
1894
2371
|
# True/False determined by Y|N values
|
1895
2372
|
t, f = format.to_s.split('|', 2)
|
1896
2373
|
case
|
1897
2374
|
when value == t
|
1898
|
-
|
2375
|
+
lit = RDF::Literal::TRUE
|
1899
2376
|
when value == f
|
1900
|
-
|
2377
|
+
lit = RDF::Literal::FALSE
|
1901
2378
|
else
|
1902
2379
|
value_errors << "#{value} does not match boolean format #{format}"
|
1903
|
-
RDF::Literal::Boolean.new(value)
|
1904
2380
|
end
|
1905
2381
|
else
|
1906
2382
|
if %w(1 true).include?(value.downcase)
|
1907
|
-
RDF::Literal::TRUE
|
2383
|
+
lit = RDF::Literal::TRUE
|
1908
2384
|
elsif %w(0 false).include?(value.downcase)
|
1909
|
-
RDF::Literal::FALSE
|
2385
|
+
lit = RDF::Literal::FALSE
|
2386
|
+
else
|
2387
|
+
value_errors << "#{value} does not match boolean"
|
1910
2388
|
end
|
1911
2389
|
end
|
1912
2390
|
when :date, :time, :dateTime, :dateTimeStamp, :datetime
|
1913
|
-
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1917
|
-
if format && (md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/))
|
1918
|
-
format, tz = md[1], md[2]
|
2391
|
+
if value = datatype.parse_uax35_date(format, value)
|
2392
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
2393
|
+
else
|
2394
|
+
value_errors << "#{original_value} does not match format #{format}"
|
1919
2395
|
end
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1923
|
-
|
1924
|
-
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
|
1932
|
-
|
1933
|
-
|
1934
|
-
|
1935
|
-
|
1936
|
-
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1937
|
-
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1938
|
-
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1939
|
-
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1940
|
-
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1941
|
-
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1942
|
-
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1943
|
-
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
|
1944
|
-
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
|
1945
|
-
else
|
1946
|
-
value_errors << "unrecognized date/time format #{date_format}" if date_format
|
1947
|
-
nil
|
2396
|
+
when :duration, :dayTimeDuration, :yearMonthDuration
|
2397
|
+
# SPEC CONFUSION: surely format also includes that for other duration types?
|
2398
|
+
re = Regexp.new(format) rescue nil
|
2399
|
+
if re.nil? ||value.match(re)
|
2400
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
2401
|
+
else
|
2402
|
+
value_errors << "#{value} does not match format #{format}"
|
2403
|
+
end
|
2404
|
+
when :hexBinary, :base64Binary
|
2405
|
+
lit = RDF::Literal.new(value, datatype: expanded_dt)
|
2406
|
+
unless lit.valid?
|
2407
|
+
value_errors << "#{value} is invalid"
|
2408
|
+
lit = RDF::Literal.new(value)
|
2409
|
+
else
|
2410
|
+
if datatype.length && lit.object.length != datatype.length
|
2411
|
+
value_errors << "decoded #{value} does not have length #{datatype.length}"
|
1948
2412
|
end
|
1949
|
-
|
1950
|
-
|
1951
|
-
if date_part
|
1952
|
-
value = value[date_part.to_s.length..-1]
|
1953
|
-
value = value.lstrip if date_part && value.start_with?(' ')
|
2413
|
+
if datatype.minLength && lit.object.length < datatype.minLength
|
2414
|
+
value_errors << "decoded #{value} does not have length >= #{datatype.length}"
|
1954
2415
|
end
|
1955
|
-
|
1956
|
-
|
1957
|
-
time_part = case time_format
|
1958
|
-
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
|
1959
|
-
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})/)
|
1960
|
-
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
|
1961
|
-
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)/)
|
1962
|
-
else
|
1963
|
-
value_errors << "unrecognized date/time format #{time_format}" if time_format
|
1964
|
-
nil
|
2416
|
+
if datatype.maxLength && lit.object.length < datatype.maxLength
|
2417
|
+
value_errors << "decoded #{value} does not have length <= #{datatype.length}"
|
1965
2418
|
end
|
1966
|
-
|
1967
|
-
# Forward past time part
|
1968
|
-
value = value[time_part.to_s.length..-1] if time_part
|
1969
|
-
|
1970
|
-
# Use datetime match for time
|
1971
|
-
time_part = date_part if date_part && date_part.names.include?("hr")
|
1972
|
-
|
1973
|
-
# If there's a timezone, it may optionally start with whitespace
|
1974
|
-
value = value.lstrip if tz.to_s.start_with?(' ')
|
1975
|
-
tz_part = value if tz
|
1976
|
-
|
1977
|
-
# Compose normalized value
|
1978
|
-
vd = ("%04d-%02d-%02d" % [date_part[:yr].to_i, date_part[:mo].to_i, date_part[:da].to_i]) if date_part
|
1979
|
-
vt = ("%02d:%02d:%02d" % [time_part[:hr].to_i, time_part[:mi].to_i, time_part[:se].to_i]) if time_part
|
1980
|
-
value = [vd, vt].compact.join('T')
|
1981
|
-
value += tz_part.to_s
|
1982
2419
|
end
|
1983
|
-
|
1984
|
-
lit = RDF::Literal(value, datatype: expanded_dt)
|
1985
|
-
when :duration, :dayTimeDuration, :yearMonthDuration
|
1986
|
-
# SPEC CONFUSION: surely format also includes that for other duration types?
|
1987
|
-
lit = RDF::Literal(value, datatype: expanded_dt)
|
1988
2420
|
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|
1989
2421
|
:ENTITY, :ID, :IDREF, :NOTATION
|
1990
2422
|
value_errors << "#{value} uses unsupported datatype: #{datatype.base}"
|
1991
2423
|
else
|
1992
2424
|
# For other types, format is a regexp
|
1993
|
-
|
2425
|
+
re = Regexp.new(format) rescue nil
|
2426
|
+
unless re.nil? || value.match(re)
|
1994
2427
|
value_errors << "#{value} does not match format #{format}"
|
1995
2428
|
end
|
1996
2429
|
lit = if value_errors.empty?
|
1997
2430
|
if expanded_dt == RDF::XSD.string
|
1998
2431
|
# Type string will still use language
|
1999
|
-
RDF::Literal(value, language: language)
|
2432
|
+
RDF::Literal(value, language: (language unless language == "und"))
|
2000
2433
|
else
|
2001
2434
|
RDF::Literal(value, datatype: expanded_dt)
|
2002
2435
|
end
|
2003
2436
|
end
|
2004
2437
|
end
|
2005
2438
|
|
2439
|
+
if datatype.length && value.to_s.length != datatype.length && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
|
2440
|
+
value_errors << "#{value} does not have length #{datatype.length}"
|
2441
|
+
end
|
2442
|
+
if datatype.minLength && value.to_s.length < datatype.minLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
|
2443
|
+
value_errors << "#{value} does not have length >= #{datatype.minLength}"
|
2444
|
+
end
|
2445
|
+
if datatype.maxLength && value.to_s.length > datatype.maxLength && ![:hexBinary, :base64Binary].include?(datatype.base.to_sym)
|
2446
|
+
value_errors << "#{value} does not have length <= #{datatype.maxLength}"
|
2447
|
+
end
|
2448
|
+
|
2449
|
+
# value constraints
|
2450
|
+
value_errors << "#{value} < minimum #{datatype.minimum}" if datatype.minimum && lit < datatype.minimum
|
2451
|
+
value_errors << "#{value} > maximum #{datatype.maximum}" if datatype.maximum && lit > datatype.maximum
|
2452
|
+
value_errors << "#{value} < minInclusive #{datatype.minInclusive}" if datatype.minInclusive && lit < datatype.minInclusive
|
2453
|
+
value_errors << "#{value} > maxInclusive #{datatype.maxInclusive}" if datatype.maxInclusive && lit > datatype.maxInclusive
|
2454
|
+
value_errors << "#{value} <= minExclusive #{datatype.minExclusive}" if datatype.minExclusive && lit <= datatype.minExclusive
|
2455
|
+
value_errors << "#{value} >= maxExclusive #{datatype.maxExclusive}" if datatype.maxExclusive && lit >= datatype.maxExclusive
|
2456
|
+
|
2006
2457
|
# Final value is a valid literal, or a plain literal otherwise
|
2007
2458
|
value_errors << "#{value} is not a valid #{datatype.base}" if lit && !lit.valid?
|
2008
2459
|
|
2009
|
-
#
|
2010
|
-
|
2460
|
+
# Either return matched literal value or errors
|
2011
2461
|
value_errors.empty? ? lit : value_errors
|
2012
2462
|
end
|
2013
2463
|
end
|
2014
|
-
|
2015
|
-
# Metadata errors detected
|
2016
|
-
class Error < StandardError; end
|
2017
2464
|
end
|