rdf-tabular 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/README.md +73 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/etc/csvw.jsonld +1507 -0
- data/etc/doap.csv +5 -0
- data/etc/doap.csv-metadata.json +34 -0
- data/etc/doap.ttl +35 -0
- data/lib/rdf/tabular.rb +34 -0
- data/lib/rdf/tabular/csvw.rb +477 -0
- data/lib/rdf/tabular/format.rb +46 -0
- data/lib/rdf/tabular/json.rb +0 -0
- data/lib/rdf/tabular/literal.rb +38 -0
- data/lib/rdf/tabular/metadata.rb +2038 -0
- data/lib/rdf/tabular/reader.rb +591 -0
- data/lib/rdf/tabular/utils.rb +33 -0
- data/lib/rdf/tabular/version.rb +18 -0
- data/spec/format_spec.rb +30 -0
- data/spec/matchers.rb +134 -0
- data/spec/metadata_spec.rb +1716 -0
- data/spec/reader_spec.rb +221 -0
- data/spec/spec_helper.rb +47 -0
- data/spec/suite_helper.rb +161 -0
- data/spec/suite_spec.rb +76 -0
- metadata +269 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
module RDF::Tabular
|
2
|
+
##
|
3
|
+
# Tabular Data/CSV format specification.
|
4
|
+
#
|
5
|
+
# @example Obtaining a Tabular format class
|
6
|
+
# RDF::Format.for(:tabular) #=> RDF::Tabular::Format
|
7
|
+
# RDF::Format.for(:csv) #=> RDF::Tabular::Format
|
8
|
+
# RDF::Format.for(:tsv) #=> RDF::Tabular::Format
|
9
|
+
# RDF::Format.for("etc/foaf.csv")
|
10
|
+
# RDF::Format.for("etc/foaf.tsv")
|
11
|
+
# RDF::Format.for(:file_name => "etc/foaf.csv")
|
12
|
+
# RDF::Format.for(:file_name => "etc/foaf.tsv")
|
13
|
+
# RDF::Format.for(:file_extension => "csv")
|
14
|
+
# RDF::Format.for(:file_extension => "tsv")
|
15
|
+
# RDF::Format.for(:content_type => "text/csv")
|
16
|
+
# RDF::Format.for(:content_type => "text/tab-separated-values")
|
17
|
+
#
|
18
|
+
# @example Obtaining serialization format MIME types
|
19
|
+
# RDF::Format.content_types #=> {"text/csv" => [RDF::Tabular::Format]}
|
20
|
+
#
|
21
|
+
# @example Obtaining serialization format file extension mappings
|
22
|
+
# RDF::Format.file_extensions #=> {:csv => "text/csv"}
|
23
|
+
#
|
24
|
+
# @see http://www.w3.org/TR/rdf-testcases/#ntriples
|
25
|
+
class Format < RDF::Format
|
26
|
+
content_type 'text/csv',
|
27
|
+
extensions: [:csv, :tsv],
|
28
|
+
alias: 'text/tab-separated-values'
|
29
|
+
content_encoding 'utf-8'
|
30
|
+
|
31
|
+
reader { RDF::Tabular::Reader }
|
32
|
+
|
33
|
+
##
|
34
|
+
# Sample detection to see if it matches JSON-LD
|
35
|
+
#
|
36
|
+
# Use a text sample to detect the format of an input file. Sub-classes implement
|
37
|
+
# a matcher sufficient to detect probably format matches, including disambiguating
|
38
|
+
# between other similar formats.
|
39
|
+
#
|
40
|
+
# @param [String] sample Beginning several bytes (~ 1K) of input.
|
41
|
+
# @return [Boolean]
|
42
|
+
def self.detect(sample)
|
43
|
+
!!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
File without changes
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# CSVW-specific literal classes
|
2
|
+
|
3
|
+
require 'rdf'
|
4
|
+
require 'rdf/xsd'
|
5
|
+
|
6
|
+
module RDF::Tabular
|
7
|
+
##
|
8
|
+
# A JSON literal.
|
9
|
+
class JSON < RDF::Literal
|
10
|
+
DATATYPE = RDF::Tabular::CSVW.json
|
11
|
+
GRAMMAR = nil
|
12
|
+
|
13
|
+
##
|
14
|
+
# @param [Object] value
|
15
|
+
# @option options [String] :lexical (nil)
|
16
|
+
def initialize(value, options = {})
|
17
|
+
@datatype = options[:datatype] || DATATYPE
|
18
|
+
@string = options[:lexical] if options.has_key?(:lexical)
|
19
|
+
if value.is_a?(String)
|
20
|
+
@string ||= value
|
21
|
+
else
|
22
|
+
@object = value
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Parse value, if necessary
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
def object
|
31
|
+
@object ||= ::JSON.parse(value)
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
@string ||= value.to_json
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,2038 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'json/ld'
|
3
|
+
require 'bcp47'
|
4
|
+
require 'addressable/template'
|
5
|
+
require 'rdf/xsd'
|
6
|
+
|
7
|
+
##
|
8
|
+
# CSVM Metadata processor
|
9
|
+
#
|
10
|
+
# * Extracts Metadata from file or Hash definition
|
11
|
+
# * Merges multiple Metadata definitions
|
12
|
+
# * Extract Metadata from a CSV file
|
13
|
+
# * Return table-level annotations
|
14
|
+
# * Return Column-level annotations
|
15
|
+
# * Return row iterator with column information
|
16
|
+
#
|
17
|
+
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
18
|
+
module RDF::Tabular
|
19
|
+
class Metadata
|
20
|
+
include Utils
|
21
|
+
|
22
|
+
# Hash representation
|
23
|
+
attr_accessor :object
|
24
|
+
|
25
|
+
# Inheritect properties, valid for all types
|
26
|
+
INHERITED_PROPERTIES = {
|
27
|
+
null: :atomic,
|
28
|
+
lang: :atomic,
|
29
|
+
textDirection: :atomic,
|
30
|
+
separator: :atomic,
|
31
|
+
default: :atomic,
|
32
|
+
ordered: :atomic,
|
33
|
+
datatype: :atomic,
|
34
|
+
aboutUrl: :uri_template,
|
35
|
+
propertyUrl: :uri_template,
|
36
|
+
valueUrl: :uri_template,
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
# Valid datatypes
|
40
|
+
DATATYPES = {
|
41
|
+
anyAtomicType: RDF::XSD.anySimpleType,
|
42
|
+
anyURI: RDF::XSD.anyURI,
|
43
|
+
base64Binary: RDF::XSD.basee65Binary,
|
44
|
+
boolean: RDF::XSD.boolean,
|
45
|
+
byte: RDF::XSD.byte,
|
46
|
+
date: RDF::XSD.date,
|
47
|
+
dateTime: RDF::XSD.dateTime,
|
48
|
+
dateTimeDuration: RDF::XSD.dateTimeDuration,
|
49
|
+
dateTimeStamp: RDF::XSD.dateTimeStamp,
|
50
|
+
decimal: RDF::XSD.decimal,
|
51
|
+
double: RDF::XSD.double,
|
52
|
+
float: RDF::XSD.float,
|
53
|
+
ENTITY: RDF::XSD.ENTITY,
|
54
|
+
gDay: RDF::XSD.gDay,
|
55
|
+
gMonth: RDF::XSD.gMonth,
|
56
|
+
gMonthDay: RDF::XSD.gMonthDay,
|
57
|
+
gYear: RDF::XSD.gYear,
|
58
|
+
gYearMonth: RDF::XSD.gYearMonth,
|
59
|
+
hexBinary: RDF::XSD.hexBinary,
|
60
|
+
int: RDF::XSD.int,
|
61
|
+
integer: RDF::XSD.integer,
|
62
|
+
language: RDF::XSD.language,
|
63
|
+
long: RDF::XSD.long,
|
64
|
+
Name: RDF::XSD.Name,
|
65
|
+
NCName: RDF::XSD.NCName,
|
66
|
+
negativeInteger: RDF::XSD.negativeInteger,
|
67
|
+
nonNegativeInteger: RDF::XSD.nonNegativeInteger,
|
68
|
+
nonPositiveInteger: RDF::XSD.nonPositiveInteger,
|
69
|
+
normalizedString: RDF::XSD.normalizedString,
|
70
|
+
NOTATION: RDF::XSD.NOTATION,
|
71
|
+
positiveInteger: RDF::XSD.positiveInteger,
|
72
|
+
QName: RDF::XSD.Qname,
|
73
|
+
short: RDF::XSD.short,
|
74
|
+
string: RDF::XSD.string,
|
75
|
+
time: RDF::XSD.time,
|
76
|
+
token: RDF::XSD.token,
|
77
|
+
unsignedByte: RDF::XSD.unsignedByte,
|
78
|
+
unsignedInt: RDF::XSD.unsignedInt,
|
79
|
+
unsignedLong: RDF::XSD.unsignedLong,
|
80
|
+
unsignedShort: RDF::XSD.unsignedShort,
|
81
|
+
yearMonthDuration: RDF::XSD.yearMonthDuration,
|
82
|
+
|
83
|
+
any: RDF::XSD.anySimpleType,
|
84
|
+
binary: RDF::XSD.base64Binary,
|
85
|
+
datetime: RDF::XSD.dateTime,
|
86
|
+
html: RDF.HTML,
|
87
|
+
json: RDF::Tabular::CSVW.JSON,
|
88
|
+
number: RDF::XSD.double,
|
89
|
+
xml: RDF.XMLLiteral,
|
90
|
+
}
|
91
|
+
|
92
|
+
# A name is restricted according to the following RegExp.
|
93
|
+
# @return [RegExp]
|
94
|
+
NAME_SYNTAX = %r(\A(?:_col|[a-zA-Z0-9]|%\h\h)([a-zA-Z0-9\._]|%\h\h)*\z)
|
95
|
+
|
96
|
+
# Local version of the context
|
97
|
+
# @return [JSON::LD::Context]
|
98
|
+
LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__))
|
99
|
+
|
100
|
+
# ID of this Metadata
|
101
|
+
# @return [RDF::URI]
|
102
|
+
attr_reader :id
|
103
|
+
|
104
|
+
# URL of related resource
|
105
|
+
# @return [RDF::URI]
|
106
|
+
attr_reader :url
|
107
|
+
|
108
|
+
# Parent of this Metadata (TableGroup for Table, ...)
|
109
|
+
# @return [Metadata]
|
110
|
+
attr_reader :parent
|
111
|
+
|
112
|
+
# Filename(s) (URI) of opened metadata, if any
|
113
|
+
# May be plural when merged
|
114
|
+
# @return [Array<RDF::URI>] filenames
|
115
|
+
attr_reader :filenames
|
116
|
+
|
117
|
+
##
|
118
|
+
# Attempt to retrieve the file at the specified path. If it is valid metadata, create a new Metadata object from it, otherwise, an empty Metadata object
|
119
|
+
#
|
120
|
+
# @param [String] path
|
121
|
+
# @param [Hash{Symbol => Object}] options
|
122
|
+
# see `RDF::Util::File.open_file` in RDF.rb
|
123
|
+
def self.open(path, options = {})
|
124
|
+
options = options.merge(
|
125
|
+
headers: {
|
126
|
+
'Accept' => 'application/ld+json, application/json'
|
127
|
+
}
|
128
|
+
)
|
129
|
+
path = "file:" + path unless path =~ /^\w+:/
|
130
|
+
RDF::Util::File.open_file(path, options) do |file|
|
131
|
+
self.new(file, options.merge(base: path, filenames: path))
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
##
|
136
|
+
# Return metadata for a file, based on user-specified and path-relative locations from an input file
|
137
|
+
# @param [IO, StringIO] input
|
138
|
+
# @param [Hash{Symbol => Object}] options
|
139
|
+
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
140
|
+
# @option options [RDF::URI] :base
|
141
|
+
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
142
|
+
# @return [Metadata]
|
143
|
+
def self.for_input(input, options = {})
|
144
|
+
base = options[:base]
|
145
|
+
|
146
|
+
# Use user metadata
|
147
|
+
user_metadata = case options[:metadata]
|
148
|
+
when Metadata then options[:metadata]
|
149
|
+
when Hash
|
150
|
+
Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
|
151
|
+
when String, RDF::URI
|
152
|
+
Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
|
153
|
+
end
|
154
|
+
|
155
|
+
found_metadata = nil
|
156
|
+
|
157
|
+
# If user_metadata does not describe input, get the first found from linked-, file-, and directory-specific metadata
|
158
|
+
unless user_metadata.is_a?(Table) || user_metadata.is_a?(TableGroup) && user_metadata.for_table(base)
|
159
|
+
# load link metadata, if available
|
160
|
+
locs = []
|
161
|
+
if input.respond_to?(:links) &&
|
162
|
+
link = input.links.find_link(%w(rel describedby))
|
163
|
+
locs << RDF::URI(base).join(link.href)
|
164
|
+
end
|
165
|
+
|
166
|
+
if base
|
167
|
+
locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
|
168
|
+
end
|
169
|
+
|
170
|
+
locs.each do |loc|
|
171
|
+
found_metadata ||= begin
|
172
|
+
Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
|
173
|
+
rescue
|
174
|
+
debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
175
|
+
nil
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
|
181
|
+
metadata = case
|
182
|
+
when user_metadata && found_metadata then user_metadata.merge(found_metadata)
|
183
|
+
when user_metadata then user_metadata
|
184
|
+
when found_metadata then found_metadata
|
185
|
+
else TableGroup.new({resources: [{url: base}]}, options)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Make TableGroup, if not already
|
189
|
+
metadata.is_a?(TableGroup) ? metadata : metadata.merge(TableGroup.new({}))
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
# @private
|
194
|
+
def self.new(input, options = {})
|
195
|
+
# Triveal case
|
196
|
+
return input if input.is_a?(Metadata)
|
197
|
+
|
198
|
+
object = case input
|
199
|
+
when Hash then input
|
200
|
+
when IO, StringIO then ::JSON.parse(input.read)
|
201
|
+
else ::JSON.parse(input.to_s)
|
202
|
+
end
|
203
|
+
|
204
|
+
unless options[:parent]
|
205
|
+
# Add context, if not set (which it should be)
|
206
|
+
object['@context'] ||= options.delete(:@context) || options[:context] || 'http://www.w3.org/ns/csvw'
|
207
|
+
end
|
208
|
+
|
209
|
+
klass = case
|
210
|
+
when !self.equal?(RDF::Tabular::Metadata)
|
211
|
+
self # subclasses can be directly constructed without type dispatch
|
212
|
+
else
|
213
|
+
type = if options[:type]
|
214
|
+
type = options[:type].to_sym
|
215
|
+
raise Error, "If provided, type must be one of :TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect]" unless
|
216
|
+
[:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect].include?(type)
|
217
|
+
type
|
218
|
+
end
|
219
|
+
|
220
|
+
# Figure out type by @type
|
221
|
+
type ||= object['@type']
|
222
|
+
|
223
|
+
# Figure out type by site
|
224
|
+
object_keys = object.keys.map(&:to_s)
|
225
|
+
type ||= case
|
226
|
+
when %w(resources).any? {|k| object_keys.include?(k)} then :TableGroup
|
227
|
+
when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
|
228
|
+
when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
|
229
|
+
when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
|
230
|
+
when %w(name required).any? {|k| object_keys.include?(k)} then :Column
|
231
|
+
when %w(commentPrefix delimiter doubleQuote encoding header headerColumnCount headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
|
232
|
+
when %w(lineTerminator quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
|
233
|
+
end
|
234
|
+
|
235
|
+
case type.to_s.to_sym
|
236
|
+
when :TableGroup then RDF::Tabular::TableGroup
|
237
|
+
when :Table then RDF::Tabular::Table
|
238
|
+
when :Transformation then RDF::Tabular::Transformation
|
239
|
+
when :Schema then RDF::Tabular::Schema
|
240
|
+
when :Column then RDF::Tabular::Column
|
241
|
+
when :Dialect then RDF::Tabular::Dialect
|
242
|
+
else
|
243
|
+
raise Error, "Unkown metadata type: #{type.inspect}"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
md = klass.allocate
|
248
|
+
md.send(:initialize, object, options)
|
249
|
+
md
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# Create Metadata from IO, Hash or String
|
254
|
+
#
|
255
|
+
# @param [Metadata, Hash, #read] input
|
256
|
+
# @param [Hash{Symbol => Object}] options
|
257
|
+
# @option options [:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect] :type
|
258
|
+
# Type of schema, if not set, intuited from properties
|
259
|
+
# @option options [JSON::LD::Context] context
|
260
|
+
# Context used for this metadata. Taken from input if not provided
|
261
|
+
# @option options [RDF::URI] :base
|
262
|
+
# The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
|
263
|
+
# @raise [Error]
|
264
|
+
# @return [Metadata]
|
265
|
+
def initialize(input, options = {})
|
266
|
+
@options = options.dup
|
267
|
+
|
268
|
+
# Get context from input
|
269
|
+
# Optimize by using built-in version of context, and just extract @base, @lang
|
270
|
+
@context = case input['@context']
|
271
|
+
when Array then LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
|
272
|
+
when Hash then LOCAL_CONTEXT.parse(input['@context'])
|
273
|
+
when nil then nil
|
274
|
+
else LOCAL_CONTEXT
|
275
|
+
end
|
276
|
+
|
277
|
+
reason = @options.delete(:reason)
|
278
|
+
|
279
|
+
@options[:base] ||= @context.base if @context
|
280
|
+
@options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
|
281
|
+
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
282
|
+
@options[:base] = RDF::URI(@options[:base])
|
283
|
+
|
284
|
+
@context.base = @options[:base] if @context
|
285
|
+
|
286
|
+
@options[:depth] ||= 0
|
287
|
+
@filenames = Array(@options[:filenames]).map {|fn| RDF::URI(fn)} if @options[:filenames]
|
288
|
+
@properties = self.class.const_get(:PROPERTIES)
|
289
|
+
@required = self.class.const_get(:REQUIRED)
|
290
|
+
|
291
|
+
@object = {}
|
292
|
+
|
293
|
+
# Parent of this Metadata, if any
|
294
|
+
@parent = @options[:parent]
|
295
|
+
|
296
|
+
depth do
|
297
|
+
# Input was parsed in .new
|
298
|
+
# Metadata is object with symbolic keys
|
299
|
+
input.each do |key, value|
|
300
|
+
key = key.to_sym
|
301
|
+
case key
|
302
|
+
when :columns
|
303
|
+
# An array of template specifications that provide mechanisms to transform the tabular data into other formats
|
304
|
+
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
305
|
+
number = 0
|
306
|
+
value.map do |v|
|
307
|
+
number += 1
|
308
|
+
Column.new(v, @options.merge(table: (parent if parent.is_a?(Table)), parent: self, context: nil, number: number))
|
309
|
+
end
|
310
|
+
else
|
311
|
+
# Invalid, but preserve value
|
312
|
+
value
|
313
|
+
end
|
314
|
+
when :datatype
|
315
|
+
# If in object form, normalize keys to symbols
|
316
|
+
object[key] = case value
|
317
|
+
when Hash
|
318
|
+
value.inject({}) {|memo, (k,v)| memo[k.to_sym] = v; memo}
|
319
|
+
else
|
320
|
+
value
|
321
|
+
end
|
322
|
+
when :dialect
|
323
|
+
# If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
|
324
|
+
object[key] = case value
|
325
|
+
when String then Dialect.open(base.join(value), @options.merge(parent: self, context: nil))
|
326
|
+
when Hash then Dialect.new(value, @options.merge(parent: self, context: nil))
|
327
|
+
else
|
328
|
+
# Invalid, but preserve value
|
329
|
+
value
|
330
|
+
end
|
331
|
+
@type ||= :Table
|
332
|
+
when :resources
|
333
|
+
# An array of table descriptions for the tables in the group.
|
334
|
+
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
335
|
+
value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
|
336
|
+
else
|
337
|
+
# Invalid, but preserve value
|
338
|
+
value
|
339
|
+
end
|
340
|
+
when :tableSchema
|
341
|
+
# An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
|
342
|
+
# SPEC SUGGESTION: when loading a remote schema, assign @id from it's location if not already set
|
343
|
+
object[key] = case value
|
344
|
+
when String
|
345
|
+
link = base.join(value).to_s
|
346
|
+
s = Schema.open(link, @options.merge(parent: self, context: nil))
|
347
|
+
s[:@id] ||= link
|
348
|
+
s
|
349
|
+
when Hash then Schema.new(value, @options.merge(parent: self, context: nil))
|
350
|
+
else
|
351
|
+
# Invalid, but preserve value
|
352
|
+
value
|
353
|
+
end
|
354
|
+
when :transformations
|
355
|
+
# An array of template specifications that provide mechanisms to transform the tabular data into other formats
|
356
|
+
object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
|
357
|
+
value.map {|v| Transformation.new(v, @options.merge(parent: self, context: nil))}
|
358
|
+
else
|
359
|
+
# Invalid, but preserve value
|
360
|
+
value
|
361
|
+
end
|
362
|
+
when :url
|
363
|
+
# URL of CSV relative to metadata
|
364
|
+
object[:url] = value
|
365
|
+
@url = base.join(value)
|
366
|
+
@context.base = @url if @context # Use as base for expanding IRIs
|
367
|
+
when :@id
|
368
|
+
# metadata identifier
|
369
|
+
object[:@id] = value
|
370
|
+
@id = base.join(value)
|
371
|
+
else
|
372
|
+
if @properties.has_key?(key)
|
373
|
+
self.send("#{key}=".to_sym, value)
|
374
|
+
else
|
375
|
+
object[key] = value
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# Set type from @type, if present and not otherwise defined
|
382
|
+
@type ||= object[:@type].to_sym if object[:@type]
|
383
|
+
if reason
|
384
|
+
debug("md#initialize") {reason}
|
385
|
+
debug("md#initialize") {"filenames: #{filenames}"}
|
386
|
+
debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
# Setters
|
391
|
+
INHERITED_PROPERTIES.keys.each do |a|
|
392
|
+
define_method("#{a}=".to_sym) do |value|
|
393
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
# Context used for this metadata. Use parent's if not defined on self.
|
398
|
+
# @return [JSON::LD::Context]
|
399
|
+
def context
|
400
|
+
@context || (parent.context if parent)
|
401
|
+
end
|
402
|
+
|
403
|
+
# Treat `dialect` similar to an inherited property, but merge together values from Table and TableGroup
|
404
|
+
# @return [Dialect]
|
405
|
+
def dialect
|
406
|
+
@dialect ||= case
|
407
|
+
when object[:dialect] then object[:dialect]
|
408
|
+
when parent then parent.dialect
|
409
|
+
when is_a?(Table) || is_a?(TableGroup)
|
410
|
+
d = Dialect.new({}, @options.merge(parent: self, context: nil))
|
411
|
+
self.dialect = d unless self.parent
|
412
|
+
d
|
413
|
+
else
|
414
|
+
raise Error, "Can't access dialect from #{self.class} without a parent"
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
# Set new dialect
|
419
|
+
# @return [Dialect]
|
420
|
+
def dialect=(value)
|
421
|
+
# Clear cached dialect information from children
|
422
|
+
object.values.each do |v|
|
423
|
+
case v
|
424
|
+
when Metadata then v.object.delete(:dialect)
|
425
|
+
when Array then v.each {|vv| vv.object.delete(:dialect) if vv.is_a?(Metadata)}
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
if value.is_a?(Hash)
|
430
|
+
@dialect = object[:dialect] = Dialect.new(value)
|
431
|
+
elsif value
|
432
|
+
# Remember invalid dialect for validation purposes
|
433
|
+
object[:dialect] = value
|
434
|
+
else
|
435
|
+
object.delete(:dialect)
|
436
|
+
@dialect = nil
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
# Type of this Metadata
|
441
|
+
# @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
|
442
|
+
def type; self.class.name.split('::').last.to_sym; end
|
443
|
+
|
444
|
+
# Base URL of metadata
|
445
|
+
# @return [RDF::URI]
|
446
|
+
def base; @options[:base]; end
|
447
|
+
|
448
|
+
##
|
449
|
+
# Do we have valid metadata?
|
450
|
+
def valid?
|
451
|
+
validate!
|
452
|
+
true
|
453
|
+
rescue
|
454
|
+
false
|
455
|
+
end
|
456
|
+
|
457
|
+
##
|
458
|
+
# Validation errors
|
459
|
+
# @return [Array<String>]
|
460
|
+
def errors
|
461
|
+
validate! && []
|
462
|
+
rescue Error => e
|
463
|
+
e.message.split("\n")
|
464
|
+
end
|
465
|
+
|
466
|
+
##
|
467
|
+
# Validate metadata, raising an error containing all errors detected during validation
|
468
|
+
# @raise [Error] Raise error if metadata has any unexpected properties
|
469
|
+
# @return [self]
|
470
|
+
def validate!
|
471
|
+
expected_props, required_props = @properties.keys, @required
|
472
|
+
errors = []
|
473
|
+
|
474
|
+
unless is_a?(Dialect) || is_a?(Transformation)
|
475
|
+
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
476
|
+
end
|
477
|
+
|
478
|
+
# It has only expected properties (exclude metadata)
|
479
|
+
check_keys = object.keys - [:"@id", :"@context"]
|
480
|
+
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
481
|
+
errors << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
482
|
+
|
483
|
+
# It has required properties
|
484
|
+
errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
485
|
+
|
486
|
+
# Every property is valid
|
487
|
+
object.keys.each do |key|
|
488
|
+
value = object[key]
|
489
|
+
case key
|
490
|
+
when :aboutUrl, :datatype, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
|
491
|
+
valid_inherited_property?(key, value) {|m| errors << m}
|
492
|
+
when :columns
|
493
|
+
if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
|
494
|
+
value.each do |v|
|
495
|
+
begin
|
496
|
+
v.validate!
|
497
|
+
rescue Error => e
|
498
|
+
errors << e.message
|
499
|
+
end
|
500
|
+
end
|
501
|
+
column_names = value.map(&:name)
|
502
|
+
errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
503
|
+
else
|
504
|
+
errors << "#{type} has invalid property '#{key}': expected array of Columns"
|
505
|
+
end
|
506
|
+
when :commentPrefix, :delimiter, :quoteChar
|
507
|
+
unless value.is_a?(String) && value.length == 1
|
508
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
|
509
|
+
end
|
510
|
+
when :format, :lineTerminator, :uriTemplate
|
511
|
+
unless value.is_a?(String)
|
512
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
|
513
|
+
end
|
514
|
+
when :dialect
|
515
|
+
unless value.is_a?(Dialect)
|
516
|
+
errors << "#{type} has invalid property '#{key}': expected a Dialect Description"
|
517
|
+
end
|
518
|
+
begin
|
519
|
+
value.validate! if value
|
520
|
+
rescue Error => e
|
521
|
+
errors << e.message
|
522
|
+
end
|
523
|
+
when :doubleQuote, :header, :required, :skipInitialSpace, :skipBlankRows, :suppressOutput, :virtual
|
524
|
+
unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
525
|
+
errors << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
|
526
|
+
end
|
527
|
+
when :encoding
|
528
|
+
unless (Encoding.find(value) rescue false)
|
529
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
|
530
|
+
end
|
531
|
+
when :foreignKeys
|
532
|
+
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
533
|
+
value.is_a?(Array) && value.each do |fk|
|
534
|
+
if fk.is_a?(Hash)
|
535
|
+
columnReference, reference = fk['columnReference'], fk['reference']
|
536
|
+
errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
|
537
|
+
errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
|
538
|
+
|
539
|
+
# Verify that columns exist in this schema
|
540
|
+
Array(columnReference).each do |k|
|
541
|
+
errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c.name == k}
|
542
|
+
end
|
543
|
+
|
544
|
+
if reference.is_a?(Hash)
|
545
|
+
ref_cols = reference['columnReference']
|
546
|
+
schema = if reference.has_key?('resource')
|
547
|
+
if reference.has_key?('schemaReference')
|
548
|
+
errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
549
|
+
end
|
550
|
+
# resource is the URL of a Table in the TableGroup
|
551
|
+
ref = base.join(reference['resource']).to_s
|
552
|
+
table = root.is_a?(TableGroup) && root.resources.detect {|t| t.url == ref}
|
553
|
+
errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
554
|
+
table.tableSchema if table
|
555
|
+
elsif reference.has_key?('schemaReference')
|
556
|
+
# resource is the @id of a Schema in the TableGroup
|
557
|
+
ref = base.join(reference['schemaReference']).to_s
|
558
|
+
tables = root.is_a?(TableGroup) ? root.resources.select {|t| t.tableSchema[:@id] == ref} : []
|
559
|
+
case tables.length
|
560
|
+
when 0
|
561
|
+
errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
562
|
+
nil
|
563
|
+
when 1
|
564
|
+
tables.first.tableSchema
|
565
|
+
else
|
566
|
+
errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
567
|
+
nil
|
568
|
+
end
|
569
|
+
end
|
570
|
+
|
571
|
+
if schema
|
572
|
+
# ref_cols must exist in schema
|
573
|
+
Array(ref_cols).each do |k|
|
574
|
+
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c.name == k}
|
575
|
+
end
|
576
|
+
end
|
577
|
+
else
|
578
|
+
errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
579
|
+
end
|
580
|
+
else
|
581
|
+
errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
|
582
|
+
end
|
583
|
+
end
|
584
|
+
when :headerColumnCount, :headerRowCount, :skipColumns, :skipRows
|
585
|
+
unless value.is_a?(Numeric) && value.integer? && value > 0
|
586
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
|
587
|
+
end
|
588
|
+
when :length, :minLength, :maxLength
|
589
|
+
unless value.is_a?(Numeric) && value.integer? && value > 0
|
590
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
|
591
|
+
end
|
592
|
+
unless key == :length || value != object[:length]
|
593
|
+
# Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
|
594
|
+
errors << "#{type} has invalid property '#{key}': Use of both length and #{key} requires they be equal"
|
595
|
+
end
|
596
|
+
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
597
|
+
unless value.is_a?(Numeric) ||
|
598
|
+
RDF::Literal::Date.new(value.to_s).valid? ||
|
599
|
+
RDF::Literal::Time.new(value.to_s).valid? ||
|
600
|
+
RDF::Literal::DateTime.new(value.to_s).valid?
|
601
|
+
errors << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
|
602
|
+
end
|
603
|
+
when :name
|
604
|
+
unless value.is_a?(String) && name.match(NAME_SYNTAX)
|
605
|
+
errors << "#{type} has invalid property '#{key}': #{value}, expected proper name format"
|
606
|
+
end
|
607
|
+
when :notes
|
608
|
+
unless value.is_a?(Hash) || value.is_a?(Array)
|
609
|
+
errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
610
|
+
end
|
611
|
+
begin
|
612
|
+
normalize_jsonld(key, value)
|
613
|
+
rescue Error => e
|
614
|
+
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
615
|
+
end
|
616
|
+
when :primaryKey
|
617
|
+
# A column reference property that holds either a single reference to a column description object or an array of references.
|
618
|
+
Array(value).each do |k|
|
619
|
+
errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
|
620
|
+
end
|
621
|
+
when :resources
|
622
|
+
if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
|
623
|
+
value.each do |t|
|
624
|
+
begin
|
625
|
+
t.validate!
|
626
|
+
rescue Error => e
|
627
|
+
errors << e.message
|
628
|
+
end
|
629
|
+
end
|
630
|
+
else
|
631
|
+
errors << "#{type} has invalid property '#{key}': expected array of Tables"
|
632
|
+
end
|
633
|
+
when :scriptFormat, :targetFormat
|
634
|
+
unless RDF::URI(value).valid?
|
635
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
|
636
|
+
end
|
637
|
+
when :source
|
638
|
+
unless %w(json rdf).include?(value) || value.nil?
|
639
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected json or rdf"
|
640
|
+
end
|
641
|
+
when :tableDirection
|
642
|
+
unless %w(rtl ltr default).include?(value)
|
643
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected rtl, ltr, or default"
|
644
|
+
end
|
645
|
+
when :tableSchema
|
646
|
+
if value.is_a?(Schema)
|
647
|
+
begin
|
648
|
+
value.validate!
|
649
|
+
rescue Error => e
|
650
|
+
errors << e.message
|
651
|
+
end
|
652
|
+
else
|
653
|
+
errors << "#{type} has invalid property '#{key}': expected Schema"
|
654
|
+
end
|
655
|
+
when :transformations
|
656
|
+
if value.is_a?(Array) && value.all? {|v| v.is_a?(Transformation)}
|
657
|
+
value.each do |t|
|
658
|
+
begin
|
659
|
+
t.validate!
|
660
|
+
rescue Error => e
|
661
|
+
errors << e.message
|
662
|
+
end
|
663
|
+
end
|
664
|
+
else
|
665
|
+
errors << "#{type} has invalid property '#{key}': expected array of Transformations"
|
666
|
+
end
|
667
|
+
when :title
|
668
|
+
valid_natural_language_property?(:title, value) {|m| errors << m}
|
669
|
+
when :trim
|
670
|
+
unless %w(true false 1 0 start end).include?(value.to_s.downcase)
|
671
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
|
672
|
+
end
|
673
|
+
when :url
|
674
|
+
unless @url.valid?
|
675
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
|
676
|
+
end
|
677
|
+
when :@id, :@context
|
678
|
+
# Skip these
|
679
|
+
when :@type
|
680
|
+
unless value.to_sym == type
|
681
|
+
errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
|
682
|
+
end
|
683
|
+
when ->(k) {key.to_s.include?(':')}
|
684
|
+
begin
|
685
|
+
normalize_jsonld(key, value)
|
686
|
+
rescue Error => e
|
687
|
+
errors << "#{type} has invalid content '#{key}': #{e.message}"
|
688
|
+
end
|
689
|
+
else
|
690
|
+
errors << "#{type} has invalid property '#{key}': unsupported property"
|
691
|
+
end
|
692
|
+
end
|
693
|
+
|
694
|
+
raise Error, errors.join("\n") unless errors.empty?
|
695
|
+
self
|
696
|
+
end
|
697
|
+
|
698
|
+
##
|
699
|
+
# Determine if a natural language property is valid
|
700
|
+
# @param [String, Array<String>, Hash{String => String}] value
|
701
|
+
# @yield message error message
|
702
|
+
# @return [Boolean]
|
703
|
+
def valid_natural_language_property?(key, value)
|
704
|
+
unless value.is_a?(Hash) && value.all? {|k, v| Array(v).all? {|vv| vv.is_a?(String)}}
|
705
|
+
yield "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid natural language property" if block_given?
|
706
|
+
false
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
##
|
711
|
+
# Determine if an inherited property is valid
|
712
|
+
# @param [String, Array<String>, Hash{String => String}] value
|
713
|
+
# @yield message error message
|
714
|
+
# @return [Boolean]
|
715
|
+
def valid_inherited_property?(key, value)
|
716
|
+
pv = parent.send(key) if parent
|
717
|
+
error = case key
|
718
|
+
when :aboutUrl, :default, :propertyUrl, :valueUrl
|
719
|
+
"string" unless value.is_a?(String)
|
720
|
+
when :datatype
|
721
|
+
# Normalization usually redundant
|
722
|
+
dt = normalize_datatype(value)
|
723
|
+
# FIXME: support arrays of datatypes?
|
724
|
+
"valid datatype" unless DATATYPES.keys.map(&:to_s).include?(dt[:base]) || RDF::URI(dt[:base]).absolute?
|
725
|
+
when :lang
|
726
|
+
"valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
|
727
|
+
when :null
|
728
|
+
# To be valid, it must be a string or array, and must be compatible with any inherited value through being a subset
|
729
|
+
"string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
|
730
|
+
when :ordered
|
731
|
+
"boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
|
732
|
+
when :separator
|
733
|
+
"single character" unless value.nil? || value.is_a?(String) && value.length == 1
|
734
|
+
when :textDirection
|
735
|
+
# A value for this property is compatible with an inherited value only if they are identical.
|
736
|
+
"rtl or ltr" unless %(rtl ltr).include?(value)
|
737
|
+
end ||
|
738
|
+
|
739
|
+
case key
|
740
|
+
# Compatibility
|
741
|
+
when :aboutUrl, :propertyUrl, :valueUrl
|
742
|
+
# No restrictions
|
743
|
+
when :default, :ordered, :separator, :textDirection
|
744
|
+
"same as that defined on parent" if pv && pv != value
|
745
|
+
when :datatype
|
746
|
+
if pv
|
747
|
+
# Normalization usually redundant
|
748
|
+
dt = normalize_datatype(value)
|
749
|
+
pvdt = normalize_datatype(pv)
|
750
|
+
vl = RDF::Literal.new("", datatype: DATATYPES[dt[:base].to_sym])
|
751
|
+
pvvl = RDF::Literal.new("", datatype: DATATYPES[pvdt[:base].to_sym])
|
752
|
+
# must be a subclass of some type defined on parent
|
753
|
+
"compatible datatype of that defined on parent" unless vl.is_a?(pvvl.class)
|
754
|
+
end
|
755
|
+
when :lang
|
756
|
+
"lang expected to restrict #{pv}" if pv && !value.start_with?(pv)
|
757
|
+
when :null
|
758
|
+
"subset of that defined on parent" if pv && (Array(value) & Array(pv)) != Array(value)
|
759
|
+
end
|
760
|
+
|
761
|
+
if error
|
762
|
+
yield "#{type} has invalid property '#{key}' ('#{value}'): expected #{error}"
|
763
|
+
false
|
764
|
+
else
|
765
|
+
true
|
766
|
+
end
|
767
|
+
end
|
768
|
+
|
769
|
+
##
|
770
|
+
# Yield each data row from the input file
|
771
|
+
#
|
772
|
+
# @param [:read] input
|
773
|
+
# @yield [Row]
|
774
|
+
def each_row(input)
|
775
|
+
csv = ::CSV.new(input, csv_options)
|
776
|
+
# Skip skipRows and headerRowCount
|
777
|
+
number, skipped = 0, (dialect.skipRows.to_i + dialect.headerRowCount)
|
778
|
+
(1..skipped).each {csv.shift}
|
779
|
+
csv.each do |data|
|
780
|
+
# Check for embedded comments
|
781
|
+
if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
|
782
|
+
v = data.join(' ')[1..-1].strip
|
783
|
+
unless v.empty?
|
784
|
+
(self["rdfs:comment"] ||= []) << v
|
785
|
+
yield RDF::Statement.new(nil, RDF::RDFS.comment, RDF::Literal(v))
|
786
|
+
end
|
787
|
+
skipped += 1
|
788
|
+
next
|
789
|
+
elsif dialect.skipBlankRows && data.join("").strip.empty?
|
790
|
+
skipped += 1
|
791
|
+
next
|
792
|
+
end
|
793
|
+
number += 1
|
794
|
+
yield(Row.new(data, self, number, number + skipped))
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
##
|
799
|
+
# Return JSON-friendly or yield RDF for common properties
|
800
|
+
#
|
801
|
+
# @overload common_properties(subject, property, value, &block)
|
802
|
+
# Yield RDF statements
|
803
|
+
# @param [RDF::Resource] subject
|
804
|
+
# @param [String] property
|
805
|
+
# @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value
|
806
|
+
# @yield property, value
|
807
|
+
# @yieldparam [String] property as a PName or URL
|
808
|
+
# @yieldparam [RDF::Statement] statement
|
809
|
+
#
|
810
|
+
# @overload common_properties(subject, property, value)
|
811
|
+
# Return value with expanded values and node references flattened
|
812
|
+
# @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>] simply extracted from metadata
|
813
|
+
def common_properties(subject, property, value, &block)
|
814
|
+
if block_given?
|
815
|
+
property = context.expand_iri(property.to_s, vocab: true) unless property.is_a?(RDF::URI)
|
816
|
+
case value
|
817
|
+
when Array
|
818
|
+
value.each {|v| common_properties(subject, property, v, &block)}
|
819
|
+
when Hash
|
820
|
+
if value['@value']
|
821
|
+
dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
|
822
|
+
lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
|
823
|
+
block.call(RDF::Statement.new(subject, property, lit))
|
824
|
+
else
|
825
|
+
# value MUST be a node object, establish a new subject from `@id`
|
826
|
+
s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
|
827
|
+
|
828
|
+
# Generate a triple
|
829
|
+
block.call(RDF::Statement.new(subject, property, s2))
|
830
|
+
|
831
|
+
# Generate types
|
832
|
+
Array(value['@type']).each do |t|
|
833
|
+
block.call(RDF::Statement.new(s2, RDF.type, context.expand_iri(t, vocab: true)))
|
834
|
+
end
|
835
|
+
|
836
|
+
# Generate triples for all other properties
|
837
|
+
value.each do |prop, val|
|
838
|
+
next if prop.to_s.start_with?('@')
|
839
|
+
common_properties(s2, prop, val, &block)
|
840
|
+
end
|
841
|
+
end
|
842
|
+
else
|
843
|
+
# Value is a primitive JSON value
|
844
|
+
lit = RDF::Literal(value)
|
845
|
+
block.call(RDF::Statement.new(subject, property, RDF::Literal(value)))
|
846
|
+
end
|
847
|
+
else
|
848
|
+
case value
|
849
|
+
when Array
|
850
|
+
value.map {|v| common_properties(subject, property, v)}
|
851
|
+
when Hash
|
852
|
+
if value['@value']
|
853
|
+
value['@value']
|
854
|
+
elsif value.keys == %w(@id) && value['@id']
|
855
|
+
value['@id']
|
856
|
+
else
|
857
|
+
nv = {}
|
858
|
+
value.each do |k, v|
|
859
|
+
case k.to_s
|
860
|
+
when '@id' then nv[k.to_s] = context.expand_iri(v['@id']).to_s
|
861
|
+
when '@type' then nv[k.to_s] = v
|
862
|
+
else nv[k.to_s] = common_properties(nil, k, v)
|
863
|
+
end
|
864
|
+
end
|
865
|
+
nv
|
866
|
+
end
|
867
|
+
else
|
868
|
+
value
|
869
|
+
end
|
870
|
+
end
|
871
|
+
end
|
872
|
+
|
873
|
+
# Does the Metadata have any common properties?
|
874
|
+
# @return [Boolean]
|
875
|
+
def has_annotations?
|
876
|
+
object.keys.any? {|k| k.to_s.include?(':')}
|
877
|
+
end
|
878
|
+
|
879
|
+
# Merge metadata into this a copy of this metadata
|
880
|
+
# @param [Array<Metadata>] metadata
|
881
|
+
# @return [Metadata]
|
882
|
+
def merge(*metadata)
|
883
|
+
return self if metadata.empty?
|
884
|
+
# If the top-level object of any of the metadata files are table descriptions, these are treated as if they were table group descriptions containing a single table description (ie having a single resource property whose value is the same as the original table description).
|
885
|
+
this = case self
|
886
|
+
when TableGroup then self.dup
|
887
|
+
when Table
|
888
|
+
if self.is_a?(Table) && self.parent
|
889
|
+
self.parent
|
890
|
+
else
|
891
|
+
content = {"@type" => "TableGroup", "resources" => [self]}
|
892
|
+
content['@context'] = object.delete(:@context) if object[:@context]
|
893
|
+
ctx = @context
|
894
|
+
self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
|
895
|
+
tg = TableGroup.new(content, filenames: @filenames, base: base)
|
896
|
+
@parent = tg # Link from parent
|
897
|
+
tg
|
898
|
+
end
|
899
|
+
else self.dup
|
900
|
+
end
|
901
|
+
|
902
|
+
# Merge all passed metadata into this
|
903
|
+
merged = metadata.reduce(this) do |memo, md|
|
904
|
+
md = case md
|
905
|
+
when TableGroup then md
|
906
|
+
when Table
|
907
|
+
if md.parent
|
908
|
+
md.parent
|
909
|
+
else
|
910
|
+
content = {"@type" => "TableGroup", "resources" => [md]}
|
911
|
+
ctx = md.context
|
912
|
+
content['@context'] = md.object.delete(:@context) if md.object[:@context]
|
913
|
+
md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
|
914
|
+
tg = TableGroup.new(content, filenames: md.filenames, base: md.base)
|
915
|
+
md.instance_variable_set(:@parent, tg) # Link from parent
|
916
|
+
tg
|
917
|
+
end
|
918
|
+
else
|
919
|
+
md
|
920
|
+
end
|
921
|
+
|
922
|
+
raise "Can't merge #{memo.class} with #{md.class}" unless memo.class == md.class
|
923
|
+
|
924
|
+
memo.merge!(md)
|
925
|
+
end
|
926
|
+
|
927
|
+
# Set @context of merged
|
928
|
+
merged[:@context] = 'http://www.w3.org/ns/csvw'
|
929
|
+
merged
|
930
|
+
end
|
931
|
+
|
932
|
+
# Merge metadata into self
|
933
|
+
def merge!(metadata)
|
934
|
+
raise "Merging non-equivalent metadata types: #{self.class} vs #{metadata.class}" unless self.class == metadata.class
|
935
|
+
|
936
|
+
depth do
|
937
|
+
# Merge filenames
|
938
|
+
if @filenames || metadata.filenames
|
939
|
+
@filenames = (Array(@filenames) | Array(metadata.filenames)).uniq
|
940
|
+
end
|
941
|
+
|
942
|
+
# Normalize A (this) and B (metadata) values into normal form
|
943
|
+
self.normalize!
|
944
|
+
metadata = metadata.dup.normalize!
|
945
|
+
|
946
|
+
@dialect = nil # So that it is re-built when needed
|
947
|
+
# Merge each property from metadata into self
|
948
|
+
metadata.each do |key, value|
|
949
|
+
case @properties[key]
|
950
|
+
when :array
|
951
|
+
# If the property is an array property, the way in which values are merged depends on the property; see the relevant property for this definition.
|
952
|
+
object[key] = case object[key]
|
953
|
+
when nil then []
|
954
|
+
when Hash then [object[key]] # Shouldn't happen if well formed
|
955
|
+
else object[key]
|
956
|
+
end
|
957
|
+
|
958
|
+
value = [value] if value.is_a?(Hash)
|
959
|
+
case key
|
960
|
+
when :notes
|
961
|
+
# If the property is notes, the result is an array containing values from A followed by values from B.
|
962
|
+
a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
|
963
|
+
b = value.is_a?(Array) ? value : [value]
|
964
|
+
object[key] = a + b
|
965
|
+
when :resources
|
966
|
+
# When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
|
967
|
+
value.each do |tb|
|
968
|
+
if ta = object[key].detect {|e| e.url == tb.url}
|
969
|
+
# if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
|
970
|
+
debug("merge!: resources") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
|
971
|
+
ta.merge!(tb)
|
972
|
+
else
|
973
|
+
# otherwise, the table description from B is appended to the array of table descriptions A
|
974
|
+
tb = tb.dup
|
975
|
+
tb.instance_variable_set(:@parent, self)
|
976
|
+
debug("merge!: resources") {"add TB: #{tb.inspect}"}
|
977
|
+
object[key] << tb
|
978
|
+
end
|
979
|
+
end
|
980
|
+
when :transformations
|
981
|
+
# SPEC CONFUSION: differing transformations with same @id?
|
982
|
+
# When an array of template specifications B is imported into an original array of template specifications A, each template specification within B is combined into the original array A by:
|
983
|
+
value.each do |t|
|
984
|
+
if ta = object[key].detect {|e| e.targetFormat == t.targetFormat && e.scriptFormat == t.scriptFormat}
|
985
|
+
# if there is a template specification with the same targetFormat and scriptFormat in A, the template specification from B is imported into the matching template specification in A
|
986
|
+
ta.merge!(t)
|
987
|
+
else
|
988
|
+
# otherwise, the template specification from B is appended to the array of template specifications A
|
989
|
+
t = t.dup
|
990
|
+
t.instance_variable_set(:@parent, self) if self
|
991
|
+
object[key] << t
|
992
|
+
end
|
993
|
+
end
|
994
|
+
when :columns
|
995
|
+
# When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
|
996
|
+
Array(value).each_with_index do |cb, index|
|
997
|
+
ca = object[key][index] || {}
|
998
|
+
va = ([ca[:name]] + (ca[:title] || {}).values.flatten).compact.map(&:downcase)
|
999
|
+
vb = ([cb[:name]] + (cb[:title] || {}).values.flatten).compact.map(&:downcase)
|
1000
|
+
if !(va & vb).empty?
|
1001
|
+
debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
|
1002
|
+
# If there's a non-empty case-insensitive intersection between the name and title values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
|
1003
|
+
ca.merge!(cb)
|
1004
|
+
elsif ca.nil? && cb.virtual
|
1005
|
+
debug("merge!: columns") {"index: #{index}, virtual"}
|
1006
|
+
# otherwise, if at a given index there is no column description within A, but there is a column description within B.
|
1007
|
+
cb = cb.dup
|
1008
|
+
cb.instance_variable_set(:@parent, self) if self
|
1009
|
+
object[key][index] = cb
|
1010
|
+
else
|
1011
|
+
debug("merge!: columns") {"index: #{index}, ignore"}
|
1012
|
+
raise Error, "Columns at same index don't match: #{ca.to_json} vs. #{cb.to_json}"
|
1013
|
+
end
|
1014
|
+
end
|
1015
|
+
# The number of non-virtual columns in A and B MUST be the same
|
1016
|
+
nA = object[key].reject(&:virtual).length
|
1017
|
+
nB = Array(value).reject(&:virtual).length
|
1018
|
+
raise Error, "Columns must have the same number of non-virtual columns" unless nA == nB || nB == 0
|
1019
|
+
when :foreignKeys
|
1020
|
+
# When an array of foreign key definitions B is imported into an original array of foreign key definitions A, each foreign key definition within B which does not appear within A is appended to the original array A.
|
1021
|
+
# SPEC CONFUSION: If definitions vary only a little, they should probably be merged (e.g. common properties).
|
1022
|
+
object[key] = object[key] + (metadata[key] - object[key])
|
1023
|
+
end
|
1024
|
+
when :object
|
1025
|
+
case key
|
1026
|
+
when :notes
|
1027
|
+
# If the property accepts arrays, the result is an array of objects or strings: those from A followed by those from B that were not already a value in A.
|
1028
|
+
a = object[key] || []
|
1029
|
+
object[key] = (a + value).uniq
|
1030
|
+
else
|
1031
|
+
# if the property only accepts single objects
|
1032
|
+
if object[key].is_a?(String) || value.is_a?(String)
|
1033
|
+
# if the value of the property in A is a string or the value from B is a string then the value from A overrides that from B
|
1034
|
+
object[key] ||= value
|
1035
|
+
elsif object[key].is_a?(Metadata)
|
1036
|
+
# otherwise (if both values as objects) the objects are merged as described here
|
1037
|
+
object[key].merge!(value)
|
1038
|
+
elsif object[key].is_a?(Hash)
|
1039
|
+
# otherwise (if both values as objects) the objects are merged as described here
|
1040
|
+
object[key].merge!(value)
|
1041
|
+
else
|
1042
|
+
value = value.dup
|
1043
|
+
value.instance_variable_set(:@parent, self) if self
|
1044
|
+
object[key] = value
|
1045
|
+
end
|
1046
|
+
end
|
1047
|
+
when :natural_language
|
1048
|
+
# If the property is a natural language property, the result is an object whose properties are language codes and where the values of those properties are arrays. The suitable language code for the values is either explicit within the existing value or determined through the default language in the metadata document; if it can't be determined the language code und should be used. The arrays should provide the values from A followed by those from B that were not already a value in A.
|
1049
|
+
a = object[key] || {}
|
1050
|
+
b = value
|
1051
|
+
debug("merge!: natural_language") {
|
1052
|
+
"A: #{a.inspect}, B: #{b.inspect}"
|
1053
|
+
}
|
1054
|
+
b.each do |k, v|
|
1055
|
+
a[k] = Array(a[k]) + (Array(b[k]) - Array(a[k]))
|
1056
|
+
end
|
1057
|
+
# eliminate titles with no language where the same string exists with a language
|
1058
|
+
if a.has_key?("und")
|
1059
|
+
a["und"] = a["und"].reject do |v|
|
1060
|
+
a.any? {|lang, values| lang != 'und' && values.include?(v)}
|
1061
|
+
end
|
1062
|
+
a.delete("und") if a["und"].empty?
|
1063
|
+
end
|
1064
|
+
object[key] = a
|
1065
|
+
when ->(k) {key == :@id}
|
1066
|
+
object[key] ||= value
|
1067
|
+
@id ||= metadata.id
|
1068
|
+
else
|
1069
|
+
# Otherwise, the value from A overrides that from B
|
1070
|
+
object[key] ||= value
|
1071
|
+
end
|
1072
|
+
end
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
debug("merge!") {self.inspect}
|
1076
|
+
self
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
def inspect
|
1080
|
+
self.class.name + object.inspect
|
1081
|
+
end
|
1082
|
+
|
1083
|
+
# Proxy to @object
|
1084
|
+
def [](key); object[key]; end
|
1085
|
+
def []=(key, value); object[key] = value; end
|
1086
|
+
def each(&block); object.each(&block); end
|
1087
|
+
def ==(other)
|
1088
|
+
object == (other.is_a?(Hash) ? other : other.object)
|
1089
|
+
end
|
1090
|
+
def to_json(args=nil); object.to_json(args); end
|
1091
|
+
|
1092
|
+
##
|
1093
|
+
# Normalize object
|
1094
|
+
# @raise [Error]
|
1095
|
+
# @return [self]
|
1096
|
+
def normalize!
|
1097
|
+
self.each do |key, value|
|
1098
|
+
self[key] = case @properties[key] || INHERITED_PROPERTIES[key]
|
1099
|
+
when ->(k) {key.to_s.include?(':') || key == :notes}
|
1100
|
+
normalize_jsonld(key, value)
|
1101
|
+
when ->(k) {key.to_s == '@context'}
|
1102
|
+
"http://www.w3.org/ns/csvw"
|
1103
|
+
when :link
|
1104
|
+
base.join(value).to_s
|
1105
|
+
when :array
|
1106
|
+
value = [value] unless value.is_a?(Array)
|
1107
|
+
value.map do |v|
|
1108
|
+
if v.is_a?(Metadata)
|
1109
|
+
v.normalize!
|
1110
|
+
elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
|
1111
|
+
# SPEC SUGGESTION: special case for foreignKeys
|
1112
|
+
ref["resource"] = base.join(ref["resource"]).to_s if ref["resource"]
|
1113
|
+
ref["schemaReference"] = base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
|
1114
|
+
v
|
1115
|
+
else
|
1116
|
+
v
|
1117
|
+
end
|
1118
|
+
end
|
1119
|
+
when :object
|
1120
|
+
case value
|
1121
|
+
when Metadata then value.normalize!
|
1122
|
+
when String
|
1123
|
+
# Load referenced JSON document
|
1124
|
+
# (This is done when objects are loaded in this implementation)
|
1125
|
+
raise "unexpected String value of property '#{key}': #{value}"
|
1126
|
+
else value
|
1127
|
+
end
|
1128
|
+
when :natural_language
|
1129
|
+
value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
|
1130
|
+
when :atomic
|
1131
|
+
case key
|
1132
|
+
when :datatype then normalize_datatype(value)
|
1133
|
+
else value
|
1134
|
+
end
|
1135
|
+
else
|
1136
|
+
value
|
1137
|
+
end
|
1138
|
+
end
|
1139
|
+
self
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
##
|
1143
|
+
# Normalize datatype to Object/Hash representation
|
1144
|
+
# @param [String, Hash{Symbol => String}] value
|
1145
|
+
# @return [Hash{Symbol => String}]
|
1146
|
+
def normalize_datatype(value)
|
1147
|
+
# Normalize datatype to array of object form
|
1148
|
+
value = {base: value} unless value.is_a?(Hash)
|
1149
|
+
# Create a new representation using symbols and transformed values
|
1150
|
+
nv = {}
|
1151
|
+
value.each do |kk, vv|
|
1152
|
+
case kk.to_sym
|
1153
|
+
when :base, :decimalChar, :format, :groupChar, :pattern then nv[kk.to_sym] = vv
|
1154
|
+
when :length, :minLength, :maxLength, :minimum, :maximum,
|
1155
|
+
:minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
1156
|
+
nv[kk.to_sym] = vv.to_i
|
1157
|
+
end
|
1158
|
+
end
|
1159
|
+
nv[:base] ||= 'string'
|
1160
|
+
nv
|
1161
|
+
end
|
1162
|
+
|
1163
|
+
##
|
1164
|
+
# Normalize JSON-LD
|
1165
|
+
#
|
1166
|
+
# Also, raise error if invalid JSON-LD dialect is detected
|
1167
|
+
#
|
1168
|
+
# @param [Symbol, String] property
|
1169
|
+
# @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value
|
1170
|
+
# @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>]
|
1171
|
+
def normalize_jsonld(property, value)
|
1172
|
+
case value
|
1173
|
+
when Array
|
1174
|
+
value.map {|v| normalize_jsonld(property, v)}
|
1175
|
+
when String
|
1176
|
+
ev = {'@value' => value}
|
1177
|
+
ev['@language'] = context.default_language if context.default_language
|
1178
|
+
ev
|
1179
|
+
when Hash
|
1180
|
+
if value['@value']
|
1181
|
+
if !(value.keys.sort - %w(@value @type @language)).empty?
|
1182
|
+
raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
1183
|
+
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1184
|
+
raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
|
1185
|
+
elsif value['@language'] && !BCP47::Language.identify(value['@language'])
|
1186
|
+
raise Error, "Value object with @language must use valid language: #{value.to_json}"
|
1187
|
+
elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
|
1188
|
+
raise Error, "Value object with @type must defined type: #{value.to_json}"
|
1189
|
+
end
|
1190
|
+
value
|
1191
|
+
else
|
1192
|
+
nv = {}
|
1193
|
+
value.each do |k, v|
|
1194
|
+
case k
|
1195
|
+
when "@id"
|
1196
|
+
nv[k] = context.expand_iri(v, documentRelative: true).to_s
|
1197
|
+
raise Error, "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
|
1198
|
+
when "@type"
|
1199
|
+
Array(v).each do |vv|
|
1200
|
+
# Validate that all type values transform to absolute IRIs
|
1201
|
+
resource = context.expand_iri(vv, vocab: true)
|
1202
|
+
raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.uri? && resource.absolute?
|
1203
|
+
end
|
1204
|
+
nv[k] = v
|
1205
|
+
when /^(@|_:)/
|
1206
|
+
raise Error, "Invalid use of #{k} in JSON-LD content"
|
1207
|
+
else
|
1208
|
+
nv[k] = normalize_jsonld(k, v)
|
1209
|
+
end
|
1210
|
+
end
|
1211
|
+
nv
|
1212
|
+
end
|
1213
|
+
else
|
1214
|
+
value
|
1215
|
+
end
|
1216
|
+
end
|
1217
|
+
protected
|
1218
|
+
|
1219
|
+
# When setting a natural language property, always put in language-map form
|
1220
|
+
# @param [Symbol] prop
|
1221
|
+
# @param [Hash{String => String, Array<String>}, Array<String>, String] value
|
1222
|
+
# @return [Hash{String => Array<String>}]
|
1223
|
+
def set_nl(prop, value)
|
1224
|
+
object[prop] = case value
|
1225
|
+
when String then {(context.default_language || 'und') => [value]}
|
1226
|
+
when Array then {(context.default_language || 'und') => value}
|
1227
|
+
else value
|
1228
|
+
end
|
1229
|
+
end
|
1230
|
+
|
1231
|
+
def inherited_property_value(method)
|
1232
|
+
# Inherited properties
|
1233
|
+
object.fetch(method.to_sym) do
|
1234
|
+
parent.send(method) if parent
|
1235
|
+
end
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
##
|
1239
|
+
# Get the root metadata object
|
1240
|
+
# @return [TableGroup, Table]
|
1241
|
+
def root
|
1242
|
+
self.parent ? self.parent.root : self
|
1243
|
+
end
|
1244
|
+
private
|
1245
|
+
# Options passed to CSV.new based on dialect
|
1246
|
+
def csv_options
|
1247
|
+
{
|
1248
|
+
col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
|
1249
|
+
row_sep: (is_a?(Dialect) ? self : dialect).lineTerminator,
|
1250
|
+
quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
|
1251
|
+
encoding: (is_a?(Dialect) ? self : dialect).encoding
|
1252
|
+
}
|
1253
|
+
end
|
1254
|
+
|
1255
|
+
class DebugContext
|
1256
|
+
include Utils
|
1257
|
+
def initialize(*args, &block)
|
1258
|
+
@options = {}
|
1259
|
+
debug(*args, &block)
|
1260
|
+
end
|
1261
|
+
end
|
1262
|
+
def self.debug(*args, &block)
|
1263
|
+
DebugContext.new(*args, &block)
|
1264
|
+
end
|
1265
|
+
end
|
1266
|
+
|
1267
|
+
class TableGroup < Metadata
|
1268
|
+
PROPERTIES = {
|
1269
|
+
:@id => :link,
|
1270
|
+
:@type => :atomic,
|
1271
|
+
notes: :array,
|
1272
|
+
resources: :array,
|
1273
|
+
tableSchema: :object,
|
1274
|
+
tableDirection: :atomic,
|
1275
|
+
dialect: :object,
|
1276
|
+
transformations: :array,
|
1277
|
+
}.freeze
|
1278
|
+
REQUIRED = [].freeze
|
1279
|
+
|
1280
|
+
# Setters
|
1281
|
+
PROPERTIES.each do |a, type|
|
1282
|
+
next if a == :dialect
|
1283
|
+
define_method("#{a}=".to_sym) do |value|
|
1284
|
+
case type
|
1285
|
+
when :natural_language
|
1286
|
+
set_nl(a, value)
|
1287
|
+
else
|
1288
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1289
|
+
end
|
1290
|
+
end
|
1291
|
+
end
|
1292
|
+
|
1293
|
+
# Does the Metadata or any descendant have any common properties
|
1294
|
+
# @return [Boolean]
|
1295
|
+
def has_annotations?
|
1296
|
+
super || resources.any? {|t| t.has_annotations? }
|
1297
|
+
end
|
1298
|
+
|
1299
|
+
# Logic for accessing elements as accessors
|
1300
|
+
def method_missing(method, *args)
|
1301
|
+
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1302
|
+
inherited_property_value(method.to_sym)
|
1303
|
+
else
|
1304
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1305
|
+
end
|
1306
|
+
end
|
1307
|
+
|
1308
|
+
##
|
1309
|
+
# Iterate over all resources
|
1310
|
+
# @yield [Table]
|
1311
|
+
def each_resource
|
1312
|
+
resources.map(&:url).each do |url|
|
1313
|
+
yield for_table(url)
|
1314
|
+
end
|
1315
|
+
end
|
1316
|
+
|
1317
|
+
##
|
1318
|
+
# Return the metadata for a specific table, re-basing context as necessary
|
1319
|
+
#
|
1320
|
+
# @param [String] url of the table
|
1321
|
+
# @return [Table]
|
1322
|
+
def for_table(url)
|
1323
|
+
# If there are no resources, assume there's one for this table
|
1324
|
+
#self.resources ||= [Table.new(url: url)]
|
1325
|
+
if table = Array(resources).detect {|t| t.url == url}
|
1326
|
+
# Set document base for this table for resolving URLs
|
1327
|
+
table.instance_variable_set(:@context, context.dup)
|
1328
|
+
table.context.base = url
|
1329
|
+
table
|
1330
|
+
end
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
# Return Annotated Table Group representation
|
1334
|
+
def to_atd
|
1335
|
+
{
|
1336
|
+
"@id" => id,
|
1337
|
+
"@type" => "AnnotatedTableGroup",
|
1338
|
+
"resources" => resources.map(&:to_atd)
|
1339
|
+
}
|
1340
|
+
end
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
class Table < Metadata
|
1344
|
+
PROPERTIES = {
|
1345
|
+
:@id => :link,
|
1346
|
+
:@type => :atomic,
|
1347
|
+
dialect: :object,
|
1348
|
+
notes: :array,
|
1349
|
+
suppressOutput: :atomic,
|
1350
|
+
tableDirection: :atomic,
|
1351
|
+
tableSchema: :object,
|
1352
|
+
transformations: :array,
|
1353
|
+
url: :link,
|
1354
|
+
}.freeze
|
1355
|
+
REQUIRED = [:url].freeze
|
1356
|
+
|
1357
|
+
# Setters
|
1358
|
+
PROPERTIES.each do |a, type|
|
1359
|
+
next if a == :dialect
|
1360
|
+
define_method("#{a}=".to_sym) do |value|
|
1361
|
+
case type
|
1362
|
+
when :natural_language
|
1363
|
+
set_nl(a, value)
|
1364
|
+
else
|
1365
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1366
|
+
end
|
1367
|
+
end
|
1368
|
+
end
|
1369
|
+
|
1370
|
+
# Does the Metadata or any descendant have any common properties
|
1371
|
+
# @return [Boolean]
|
1372
|
+
def has_annotations?
|
1373
|
+
super || tableSchema && tableSchema.has_annotations?
|
1374
|
+
end
|
1375
|
+
|
1376
|
+
# Return Annotated Table representation
|
1377
|
+
def to_atd
|
1378
|
+
{
|
1379
|
+
"@id" => id,
|
1380
|
+
"@type" => "AnnotatedTable",
|
1381
|
+
"columns" => tableSchema.columns.map(&:to_atd),
|
1382
|
+
"rows" => [],
|
1383
|
+
"url" => self.url.to_s
|
1384
|
+
}
|
1385
|
+
end
|
1386
|
+
|
1387
|
+
# Logic for accessing elements as accessors
|
1388
|
+
def method_missing(method, *args)
|
1389
|
+
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1390
|
+
inherited_property_value(method.to_sym)
|
1391
|
+
else
|
1392
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1393
|
+
end
|
1394
|
+
end
|
1395
|
+
end
|
1396
|
+
|
1397
|
+
class Transformation < Metadata
|
1398
|
+
PROPERTIES = {
|
1399
|
+
:@id => :link,
|
1400
|
+
:@type => :atomic,
|
1401
|
+
source: :atomic,
|
1402
|
+
targetFormat: :link,
|
1403
|
+
scriptFormat: :link,
|
1404
|
+
title: :natural_language,
|
1405
|
+
url: :link,
|
1406
|
+
}.freeze
|
1407
|
+
REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
|
1408
|
+
|
1409
|
+
# Setters
|
1410
|
+
PROPERTIES.each do |a, type|
|
1411
|
+
define_method("#{a}=".to_sym) do |value|
|
1412
|
+
case type
|
1413
|
+
when :natural_language
|
1414
|
+
set_nl(a, value)
|
1415
|
+
else
|
1416
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1417
|
+
end
|
1418
|
+
end
|
1419
|
+
end
|
1420
|
+
|
1421
|
+
# Logic for accessing elements as accessors
|
1422
|
+
def method_missing(method, *args)
|
1423
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1424
|
+
end
|
1425
|
+
end
|
1426
|
+
|
1427
|
+
class Schema < Metadata
|
1428
|
+
PROPERTIES = {
|
1429
|
+
:@id => :link,
|
1430
|
+
:@type => :atomic,
|
1431
|
+
columns: :array,
|
1432
|
+
foreignKeys: :array,
|
1433
|
+
primaryKey: :column_reference,
|
1434
|
+
}.freeze
|
1435
|
+
REQUIRED = [].freeze
|
1436
|
+
|
1437
|
+
# Setters
|
1438
|
+
PROPERTIES.each do |a, type|
|
1439
|
+
define_method("#{a}=".to_sym) do |value|
|
1440
|
+
case type
|
1441
|
+
when :natural_language
|
1442
|
+
set_nl(a, value)
|
1443
|
+
else
|
1444
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1445
|
+
end
|
1446
|
+
end
|
1447
|
+
end
|
1448
|
+
|
1449
|
+
# Logic for accessing elements as accessors
|
1450
|
+
def method_missing(method, *args)
|
1451
|
+
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1452
|
+
inherited_property_value(method.to_sym)
|
1453
|
+
else
|
1454
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1455
|
+
end
|
1456
|
+
end
|
1457
|
+
end
|
1458
|
+
|
1459
|
+
class Column < Metadata
|
1460
|
+
PROPERTIES = {
|
1461
|
+
:@id => :link,
|
1462
|
+
:@type => :atomic,
|
1463
|
+
name: :atomic,
|
1464
|
+
suppressOutput: :atomic,
|
1465
|
+
title: :natural_language,
|
1466
|
+
required: :atomic,
|
1467
|
+
virtual: :atomic,
|
1468
|
+
}.freeze
|
1469
|
+
REQUIRED = [].freeze
|
1470
|
+
|
1471
|
+
##
|
1472
|
+
# Table containing this column (if any)
|
1473
|
+
# @return [Table]
|
1474
|
+
def table; @options[:table]; end
|
1475
|
+
|
1476
|
+
# Column number set on initialization
|
1477
|
+
# @return [Integer] 1-based colnum number
|
1478
|
+
def number
|
1479
|
+
@options.fetch(:number, 0)
|
1480
|
+
end
|
1481
|
+
|
1482
|
+
# Source Column number set on initialization
|
1483
|
+
#
|
1484
|
+
# @note this is lazy evaluated to avoid dependencies on setting dialect vs. initializing columns
|
1485
|
+
# @return [Integer] 1-based colnum number
|
1486
|
+
def sourceNumber
|
1487
|
+
skipColumns = table ? (dialect.skipColumns.to_i + dialect.headerColumnCount.to_i) : 0
|
1488
|
+
number + skipColumns
|
1489
|
+
end
|
1490
|
+
|
1491
|
+
# Does the Metadata or any descendant have any common properties
|
1492
|
+
# @return [Boolean]
|
1493
|
+
def has_annotations?
|
1494
|
+
super || columns.any? {|c| c.has_annotations? }
|
1495
|
+
end
|
1496
|
+
|
1497
|
+
# Setters
|
1498
|
+
PROPERTIES.each do |a, type|
|
1499
|
+
define_method("#{a}=".to_sym) do |value|
|
1500
|
+
case type
|
1501
|
+
when :natural_language
|
1502
|
+
set_nl(a, value)
|
1503
|
+
else
|
1504
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1505
|
+
end
|
1506
|
+
end
|
1507
|
+
end
|
1508
|
+
|
1509
|
+
# Return or create a name for the column from title, if it exists
|
1510
|
+
def name
|
1511
|
+
object[:name] ||= if title && (ts = title[context.default_language || 'und'])
|
1512
|
+
n = Array(ts).first
|
1513
|
+
n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
|
1514
|
+
n1 = URI.encode(n[1..-1], /[^\w\.]/)
|
1515
|
+
"#{n0}#{n1}"
|
1516
|
+
end || "_col.#{number}"
|
1517
|
+
end
|
1518
|
+
|
1519
|
+
# Identifier for this Column, as an RFC7111 fragment
|
1520
|
+
# @return [RDF::URI]
|
1521
|
+
def id;
|
1522
|
+
url = table ? table.url : RDF::URI("")
|
1523
|
+
url + "#col=#{self.sourceNumber}";
|
1524
|
+
end
|
1525
|
+
|
1526
|
+
# Return Annotated Column representation
|
1527
|
+
def to_atd
|
1528
|
+
{
|
1529
|
+
"@id" => id,
|
1530
|
+
"@type" => "Column",
|
1531
|
+
"table" => (table.id if table),
|
1532
|
+
"number" => self.number,
|
1533
|
+
"sourceNumber" => self.sourceNumber,
|
1534
|
+
"cells" => [],
|
1535
|
+
"virtual" => self.virtual,
|
1536
|
+
"name" => self.name,
|
1537
|
+
"title" => self.title
|
1538
|
+
}
|
1539
|
+
end
|
1540
|
+
|
1541
|
+
# Logic for accessing elements as accessors
|
1542
|
+
def method_missing(method, *args)
|
1543
|
+
if INHERITED_PROPERTIES.has_key?(method.to_sym)
|
1544
|
+
inherited_property_value(method.to_sym)
|
1545
|
+
else
|
1546
|
+
PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
|
1547
|
+
end
|
1548
|
+
end
|
1549
|
+
end
|
1550
|
+
|
1551
|
+
class Dialect < Metadata
|
1552
|
+
# Defaults for dialects
|
1553
|
+
DIALECT_DEFAULTS = {
|
1554
|
+
commentPrefix: nil,
|
1555
|
+
delimiter: ",".freeze,
|
1556
|
+
doubleQuote: true,
|
1557
|
+
encoding: "utf-8".freeze,
|
1558
|
+
header: true,
|
1559
|
+
headerColumnCount: 0,
|
1560
|
+
headerRowCount: 1,
|
1561
|
+
lineTerminator: :auto, # SPEC says "\r\n"
|
1562
|
+
quoteChar: '"',
|
1563
|
+
skipBlankRows: false,
|
1564
|
+
skipColumns: 0,
|
1565
|
+
skipInitialSpace: false,
|
1566
|
+
skipRows: 0,
|
1567
|
+
trim: false
|
1568
|
+
}.freeze
|
1569
|
+
|
1570
|
+
PROPERTIES = {
|
1571
|
+
:@id => :link,
|
1572
|
+
:@type => :atomic,
|
1573
|
+
commentPrefix: :atomic,
|
1574
|
+
delimiter: :atomic,
|
1575
|
+
doubleQuote: :atomic,
|
1576
|
+
encoding: :atomic,
|
1577
|
+
header: :atomic,
|
1578
|
+
headerColumnCount: :atomic,
|
1579
|
+
headerRowCount: :atomic,
|
1580
|
+
lineTerminator: :atomic,
|
1581
|
+
quoteChar: :atomic,
|
1582
|
+
skipBlankRows: :atomic,
|
1583
|
+
skipColumns: :atomic,
|
1584
|
+
skipInitialSpace: :atomic,
|
1585
|
+
skipRows: :atomic,
|
1586
|
+
trim: :atomic,
|
1587
|
+
}.freeze
|
1588
|
+
|
1589
|
+
REQUIRED = [].freeze
|
1590
|
+
|
1591
|
+
# Setters
|
1592
|
+
PROPERTIES.keys.each do |a|
|
1593
|
+
define_method("#{a}=".to_sym) do |value|
|
1594
|
+
object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
|
1595
|
+
end
|
1596
|
+
end
|
1597
|
+
|
1598
|
+
# escape character
|
1599
|
+
# @return [String]
|
1600
|
+
def escape_character
|
1601
|
+
self.doubleQuote ? '"' : '\\'
|
1602
|
+
end
|
1603
|
+
|
1604
|
+
# default for headerRowCount is zero if header is false
|
1605
|
+
# @return [Integer]
|
1606
|
+
def headerRowCount
|
1607
|
+
object.fetch(:headerRowCount, self.header ? 1 : 0)
|
1608
|
+
end
|
1609
|
+
|
1610
|
+
# default for trim comes from skipInitialSpace
|
1611
|
+
# @return [Boolean, String]
|
1612
|
+
def trim
|
1613
|
+
object.fetch(:trim, self.skipInitialSpace ? 'start' : false)
|
1614
|
+
end
|
1615
|
+
|
1616
|
+
##
|
1617
|
+
# Extract a new Metadata document from the file or data provided
|
1618
|
+
#
|
1619
|
+
# @param [#read, #to_s] input IO, or file path or URL
|
1620
|
+
# @param [Hash{Symbol => Object}] options
|
1621
|
+
# any additional options (see `RDF::Util::File.open_file`)
|
1622
|
+
# @return [Metadata] Tabular metadata
|
1623
|
+
# @see http://w3c.github.io/csvw/syntax/#parsing
|
1624
|
+
def embedded_metadata(input, options = {})
|
1625
|
+
options = options.dup
|
1626
|
+
options.delete(:context) # Don't accidentally use a passed context
|
1627
|
+
# Normalize input to an IO object
|
1628
|
+
if !input.respond_to?(:read)
|
1629
|
+
return ::RDF::Util::File.open_file(input.to_s) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
|
1630
|
+
end
|
1631
|
+
|
1632
|
+
table = {
|
1633
|
+
"url" => (options.fetch(:base, "")),
|
1634
|
+
"@type" => "Table",
|
1635
|
+
"tableSchema" => {
|
1636
|
+
"@type" => "Schema",
|
1637
|
+
"columns" => []
|
1638
|
+
}
|
1639
|
+
}
|
1640
|
+
|
1641
|
+
# Set encoding on input
|
1642
|
+
csv = ::CSV.new(input, csv_options)
|
1643
|
+
(1..skipRows.to_i).each do
|
1644
|
+
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1645
|
+
# Trim value
|
1646
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1647
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1648
|
+
|
1649
|
+
value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
|
1650
|
+
(table["rdfs:comment"] ||= []) << value unless value.empty?
|
1651
|
+
end
|
1652
|
+
debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1653
|
+
|
1654
|
+
(1..headerRowCount).each do
|
1655
|
+
row_data = Array(csv.shift)
|
1656
|
+
Array(row_data).each_with_index do |value, index|
|
1657
|
+
# Skip columns
|
1658
|
+
skipCols = skipColumns.to_i + headerColumnCount.to_i
|
1659
|
+
next if index < skipCols
|
1660
|
+
|
1661
|
+
# Trim value
|
1662
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1663
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1664
|
+
|
1665
|
+
# Initialize title
|
1666
|
+
# SPEC CONFUSION: does title get an array, or concatenated values?
|
1667
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1668
|
+
column = columns[index - skipCols] ||= {
|
1669
|
+
"title" => {"und" => []},
|
1670
|
+
}
|
1671
|
+
column["title"]["und"] << value
|
1672
|
+
end
|
1673
|
+
end
|
1674
|
+
debug("embedded_metadata") {"table: #{table.inspect}"}
|
1675
|
+
input.rewind if input.respond_to?(:rewind)
|
1676
|
+
|
1677
|
+
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
1678
|
+
end
|
1679
|
+
|
1680
|
+
# Logic for accessing elements as accessors
|
1681
|
+
def method_missing(method, *args)
|
1682
|
+
if DIALECT_DEFAULTS.has_key?(method.to_sym)
|
1683
|
+
# As set, or with default
|
1684
|
+
object.fetch(method.to_sym, DIALECT_DEFAULTS[method.to_sym])
|
1685
|
+
else
|
1686
|
+
super
|
1687
|
+
end
|
1688
|
+
end
|
1689
|
+
end
|
1690
|
+
|
1691
|
+
# Wraps each resulting row
|
1692
|
+
class Row
|
1693
|
+
# Class for returning values
|
1694
|
+
Cell = Struct.new(:table, :column, :row, :stringValue, :aboutUrl, :propertyUrl, :valueUrl, :value, :errors) do
|
1695
|
+
def set_urls(mapped_values)
|
1696
|
+
%w(aboutUrl propertyUrl valueUrl).each do |prop|
|
1697
|
+
# If the cell value is nil, and it is not a virtual column
|
1698
|
+
next if prop == "valueUrl" && value.nil? && !column.virtual
|
1699
|
+
if v = column.send(prop.to_sym)
|
1700
|
+
t = Addressable::Template.new(v)
|
1701
|
+
mapped = t.expand(mapped_values).to_s
|
1702
|
+
# FIXME: don't expand here, do it in CSV2RDF
|
1703
|
+
url = row.context.expand_iri(mapped, documentRelative: true)
|
1704
|
+
self.send("#{prop}=".to_sym, url)
|
1705
|
+
end
|
1706
|
+
end
|
1707
|
+
end
|
1708
|
+
|
1709
|
+
def valid?; Array(errors).empty?; end
|
1710
|
+
def to_s; value.to_s; end
|
1711
|
+
|
1712
|
+
# Identifier for this Cell, as an RFC7111 fragment
|
1713
|
+
# @return [RDF::URI]
|
1714
|
+
def id; table.url + "#cell=#{self.row.sourceNumber},#{self.column.sourceNumber}"; end
|
1715
|
+
|
1716
|
+
# Return Annotated Cell representation
|
1717
|
+
def to_atd
|
1718
|
+
{
|
1719
|
+
"@id" => self.id,
|
1720
|
+
"@type" => "Cell",
|
1721
|
+
"column" => column.id,
|
1722
|
+
"row" => row.id,
|
1723
|
+
"stringValue" => self.stringValue,
|
1724
|
+
"value" => self.value,
|
1725
|
+
"errors" => self.errors
|
1726
|
+
}
|
1727
|
+
end
|
1728
|
+
end
|
1729
|
+
|
1730
|
+
# Row values, hashed by `name`
|
1731
|
+
attr_reader :values
|
1732
|
+
|
1733
|
+
# Row number of this row
|
1734
|
+
# @return [Integer]
|
1735
|
+
attr_reader :number
|
1736
|
+
|
1737
|
+
# Row number of this row from the original source
|
1738
|
+
# @return [Integer]
|
1739
|
+
attr_reader :sourceNumber
|
1740
|
+
|
1741
|
+
#
|
1742
|
+
# Table containing this row
|
1743
|
+
# @return [Table]
|
1744
|
+
attr_reader :table
|
1745
|
+
|
1746
|
+
#
|
1747
|
+
# Context from Table with base set to table URL for expanding URI Templates
|
1748
|
+
# @return [JSON::LD::Context]
|
1749
|
+
attr_reader :context
|
1750
|
+
|
1751
|
+
##
|
1752
|
+
# @param [Array<Array<String>>] row
|
1753
|
+
# @param [Metadata] metadata for Table
|
1754
|
+
# @param [Integer] number 1-based row number after skipped/header rows
|
1755
|
+
# @param [Integer] source_number 1-based row number from source
|
1756
|
+
# @return [Row]
|
1757
|
+
def initialize(row, metadata, number, source_number)
|
1758
|
+
@table = metadata
|
1759
|
+
@number = number
|
1760
|
+
@sourceNumber = source_number
|
1761
|
+
@values = []
|
1762
|
+
skipColumns = metadata.dialect.skipColumns.to_i + metadata.dialect.headerColumnCount.to_i
|
1763
|
+
|
1764
|
+
@context = table.context.dup
|
1765
|
+
@context.base = table.url
|
1766
|
+
|
1767
|
+
# Create values hash
|
1768
|
+
# SPEC CONFUSION: are values pre-or-post conversion?
|
1769
|
+
map_values = {"_row" => number, "_sourceRow" => source_number}
|
1770
|
+
|
1771
|
+
columns = metadata.tableSchema.columns ||= []
|
1772
|
+
|
1773
|
+
# Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
|
1774
|
+
columns.each_with_index {|c, index| row[index] ||= (c.null || '')}
|
1775
|
+
row.each_with_index do |value, index|
|
1776
|
+
|
1777
|
+
next if index < skipColumns
|
1778
|
+
|
1779
|
+
cell_errors = []
|
1780
|
+
|
1781
|
+
# create column if necessary
|
1782
|
+
columns[index - skipColumns] ||=
|
1783
|
+
Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
|
1784
|
+
|
1785
|
+
column = columns[index - skipColumns]
|
1786
|
+
|
1787
|
+
@values << cell = Cell.new(metadata, column, self, value)
|
1788
|
+
|
1789
|
+
datatype = metadata.normalize_datatype(column.datatype || 'string')
|
1790
|
+
value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype[:base])
|
1791
|
+
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype[:base])
|
1792
|
+
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
1793
|
+
value = column.default || '' if value.empty?
|
1794
|
+
|
1795
|
+
cell_values = column.separator ? value.split(column.separator) : [value]
|
1796
|
+
|
1797
|
+
cell_values = cell_values.map do |v|
|
1798
|
+
v = v.strip unless %w(string anyAtomicType any).include?(datatype[:base])
|
1799
|
+
v = column.default || '' if v.empty?
|
1800
|
+
if Array(column.null).include?(v)
|
1801
|
+
nil
|
1802
|
+
else
|
1803
|
+
# Trim value
|
1804
|
+
if %w(string anyAtomicType any).include?(datatype[:base])
|
1805
|
+
v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
|
1806
|
+
v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
|
1807
|
+
else
|
1808
|
+
# unless the datatype is string or anyAtomicType or any, strip leading and trailing whitespace from the string value
|
1809
|
+
v.strip!
|
1810
|
+
end
|
1811
|
+
|
1812
|
+
expanded_dt = metadata.context.expand_iri(datatype[:base], vocab: true)
|
1813
|
+
if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
|
1814
|
+
lit_or_errors
|
1815
|
+
else
|
1816
|
+
cell_errors += lit_or_errors
|
1817
|
+
RDF::Literal(v, language: column.lang)
|
1818
|
+
end
|
1819
|
+
end
|
1820
|
+
end.compact
|
1821
|
+
|
1822
|
+
cell.value = (column.separator ? cell_values : cell_values.first)
|
1823
|
+
cell.errors = cell_errors
|
1824
|
+
metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
|
1825
|
+
|
1826
|
+
map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
|
1827
|
+
end
|
1828
|
+
|
1829
|
+
# Map URLs for row
|
1830
|
+
@values.each_with_index do |cell, index|
|
1831
|
+
mapped_values = map_values.merge(
|
1832
|
+
"_name" => URI.decode(cell.column.name),
|
1833
|
+
"_column" => cell.column.number,
|
1834
|
+
"_sourceColumn" => cell.column.sourceNumber
|
1835
|
+
)
|
1836
|
+
cell.set_urls(mapped_values)
|
1837
|
+
end
|
1838
|
+
end
|
1839
|
+
|
1840
|
+
# Identifier for this row, as an RFC7111 fragment
|
1841
|
+
# @return [RDF::URI]
|
1842
|
+
def id; table.url + "#row=#{self.sourceNumber}"; end
|
1843
|
+
|
1844
|
+
# Return Annotated Row representation
|
1845
|
+
def to_atd
|
1846
|
+
{
|
1847
|
+
"@id" => self.id,
|
1848
|
+
"@type" => "Row",
|
1849
|
+
"table" => table.id,
|
1850
|
+
"number" => self.number,
|
1851
|
+
"sourceNumber" => self.sourceNumber,
|
1852
|
+
"cells" => @values.map(&:to_atd)
|
1853
|
+
}
|
1854
|
+
end
|
1855
|
+
|
1856
|
+
private
|
1857
|
+
#
|
1858
|
+
# given a datatype specification, return a literal matching that specififcation, if found, otherwise nil
|
1859
|
+
# @return [RDF::Literal]
|
1860
|
+
def value_matching_datatype(value, datatype, expanded_dt, language)
|
1861
|
+
value_errors = []
|
1862
|
+
|
1863
|
+
# Check constraints
|
1864
|
+
if datatype[:length] && value.length != datatype[:length]
|
1865
|
+
value_errors << "#{value} does not have length #{datatype[:length]}"
|
1866
|
+
end
|
1867
|
+
if datatype[:minLength] && value.length < datatype[:minLength]
|
1868
|
+
value_errors << "#{value} does not have length >= #{datatype[:minLength]}"
|
1869
|
+
end
|
1870
|
+
if datatype[:maxLength] && value.length > datatype[:maxLength]
|
1871
|
+
value_errors << "#{value} does not have length <= #{datatype[:maxLength]}"
|
1872
|
+
end
|
1873
|
+
|
1874
|
+
format = datatype[:format]
|
1875
|
+
# Datatype specific constraints and conversions
|
1876
|
+
case datatype[:base].to_sym
|
1877
|
+
when :decimal, :integer, :long, :int, :short, :byte,
|
1878
|
+
:nonNegativeInteger, :positiveInteger,
|
1879
|
+
:unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
|
1880
|
+
:nonPositiveInteger, :negativeInteger,
|
1881
|
+
:double, :float, :number
|
1882
|
+
# Normalize representation based on numeric-specific facets
|
1883
|
+
groupChar = datatype.fetch(:groupChar, ',')
|
1884
|
+
if datatype[:pattern] && !value.match(Regexp.new(datatype[:pattern]))
|
1885
|
+
# pattern facet failed
|
1886
|
+
value_errors << "#{value} does not match pattern #{datatype[:pattern]}"
|
1887
|
+
end
|
1888
|
+
if value.include?(groupChar*2)
|
1889
|
+
# pattern facet failed
|
1890
|
+
value_errors << "#{value} has repeating #{groupChar.inspect}"
|
1891
|
+
end
|
1892
|
+
value.gsub!(groupChar, '')
|
1893
|
+
value.sub!(datatype.fetch(:decimalChar, '.'), '.')
|
1894
|
+
|
1895
|
+
# Extract percent or per-mille sign
|
1896
|
+
percent = permille = false
|
1897
|
+
case value
|
1898
|
+
when /%$/
|
1899
|
+
value = value[0..-2]
|
1900
|
+
percent = true
|
1901
|
+
when /‰$/
|
1902
|
+
value = value[0..-2]
|
1903
|
+
permille = true
|
1904
|
+
end
|
1905
|
+
|
1906
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
1907
|
+
if percent || permille
|
1908
|
+
o = lit.object
|
1909
|
+
o = o / 100 if percent
|
1910
|
+
o = o / 1000 if permille
|
1911
|
+
lit = RDF::Literal(o, datatype: expanded_dt)
|
1912
|
+
end
|
1913
|
+
when :boolean
|
1914
|
+
lit = if format
|
1915
|
+
# True/False determined by Y|N values
|
1916
|
+
t, f = format.to_s.split('|', 2)
|
1917
|
+
case
|
1918
|
+
when value == t
|
1919
|
+
value = RDF::Literal::TRUE
|
1920
|
+
when value == f
|
1921
|
+
value = RDF::Literal::FALSE
|
1922
|
+
else
|
1923
|
+
value_errors << "#{value} does not match boolean format #{format}"
|
1924
|
+
RDF::Literal::Boolean.new(value)
|
1925
|
+
end
|
1926
|
+
else
|
1927
|
+
if %w(1 true).include?(value.downcase)
|
1928
|
+
RDF::Literal::TRUE
|
1929
|
+
elsif %w(0 false).include?(value.downcase)
|
1930
|
+
RDF::Literal::FALSE
|
1931
|
+
end
|
1932
|
+
end
|
1933
|
+
when :date, :time, :dateTime, :dateTimeStamp, :datetime
|
1934
|
+
# Match values
|
1935
|
+
tz, date_format, time_format = nil, nil, nil
|
1936
|
+
|
1937
|
+
# Extract tz info
|
1938
|
+
if format && (md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/))
|
1939
|
+
format, tz = md[1], md[2]
|
1940
|
+
end
|
1941
|
+
|
1942
|
+
if format
|
1943
|
+
date_format, time_format = format.split(' ')
|
1944
|
+
if datatype[:base].to_sym == :time
|
1945
|
+
date_format, time_format = nil, date_format
|
1946
|
+
end
|
1947
|
+
|
1948
|
+
# Extract date, of specified
|
1949
|
+
date_part = case date_format
|
1950
|
+
when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
|
1951
|
+
when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
|
1952
|
+
when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
|
1953
|
+
when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
|
1954
|
+
when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
|
1955
|
+
when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
|
1956
|
+
when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
|
1957
|
+
when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
|
1958
|
+
when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
|
1959
|
+
when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
|
1960
|
+
when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
|
1961
|
+
when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
|
1962
|
+
when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
|
1963
|
+
when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
|
1964
|
+
when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
|
1965
|
+
when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
|
1966
|
+
else
|
1967
|
+
value_errors << "unrecognized date/time format #{date_format}" if date_format
|
1968
|
+
nil
|
1969
|
+
end
|
1970
|
+
|
1971
|
+
# Forward past date part
|
1972
|
+
if date_part
|
1973
|
+
value = value[date_part.to_s.length..-1]
|
1974
|
+
value = value.lstrip if date_part && value.start_with?(' ')
|
1975
|
+
end
|
1976
|
+
|
1977
|
+
# Extract time, of specified
|
1978
|
+
time_part = case time_format
|
1979
|
+
when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
|
1980
|
+
when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})/)
|
1981
|
+
when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
|
1982
|
+
when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)/)
|
1983
|
+
else
|
1984
|
+
value_errors << "unrecognized date/time format #{time_format}" if time_format
|
1985
|
+
nil
|
1986
|
+
end
|
1987
|
+
|
1988
|
+
# Forward past time part
|
1989
|
+
value = value[time_part.to_s.length..-1] if time_part
|
1990
|
+
|
1991
|
+
# Use datetime match for time
|
1992
|
+
time_part = date_part if date_part && date_part.names.include?("hr")
|
1993
|
+
|
1994
|
+
# If there's a timezone, it may optionally start with whitespace
|
1995
|
+
value = value.lstrip if tz.to_s.start_with?(' ')
|
1996
|
+
tz_part = value if tz
|
1997
|
+
|
1998
|
+
# Compose normalized value
|
1999
|
+
vd = ("%04d-%02d-%02d" % [date_part[:yr], date_part[:mo], date_part[:da]]) if date_part
|
2000
|
+
vt = ("%02d:%02d:%02d" % [time_part[:hr], time_part[:mi], time_part[:se].to_i]) if time_part
|
2001
|
+
value = [vd, vt].compact.join('T')
|
2002
|
+
value += tz_part.to_s
|
2003
|
+
end
|
2004
|
+
|
2005
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
2006
|
+
when :duration, :dayTimeDuration, :yearMonthDuration
|
2007
|
+
# SPEC CONFUSION: surely format also includes that for other duration types?
|
2008
|
+
lit = RDF::Literal(value, datatype: expanded_dt)
|
2009
|
+
when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
|
2010
|
+
:ENTITY, :ID, :IDREF, :NOTATION
|
2011
|
+
value_errors << "#{value} uses unsupported datatype: #{datatype[:base]}"
|
2012
|
+
else
|
2013
|
+
# For other types, format is a regexp
|
2014
|
+
unless format.nil? || value.match(Regexp.new(format))
|
2015
|
+
value_errors << "#{value} does not match format #{format}"
|
2016
|
+
end
|
2017
|
+
lit = if value_errors.empty?
|
2018
|
+
if expanded_dt == RDF::XSD.string
|
2019
|
+
# Type string will still use language
|
2020
|
+
RDF::Literal(value, language: language)
|
2021
|
+
else
|
2022
|
+
RDF::Literal(value, datatype: expanded_dt)
|
2023
|
+
end
|
2024
|
+
end
|
2025
|
+
end
|
2026
|
+
|
2027
|
+
# Final value is a valid literal, or a plain literal otherwise
|
2028
|
+
value_errors << "#{value} is not a valid #{datatype[:base]}" if lit && !lit.valid?
|
2029
|
+
|
2030
|
+
# FIXME Value constraints
|
2031
|
+
|
2032
|
+
value_errors.empty? ? lit : value_errors
|
2033
|
+
end
|
2034
|
+
end
|
2035
|
+
|
2036
|
+
# Metadata errors detected
|
2037
|
+
class Error < StandardError; end
|
2038
|
+
end
|