rdf-tabular 0.1.3.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/VERSION +1 -1
- data/etc/README +4 -0
- data/etc/csvw.jsonld +1 -1
- data/etc/earl.html +10544 -0
- data/etc/earl.jsonld +17068 -0
- data/etc/earl.ttl +7114 -0
- data/etc/template.haml +205 -0
- data/etc/well-known +4 -0
- data/lib/rdf/tabular.rb +10 -4
- data/lib/rdf/tabular/format.rb +11 -7
- data/lib/rdf/tabular/metadata.rb +761 -314
- data/lib/rdf/tabular/reader.rb +276 -144
- data/spec/format_spec.rb +11 -8
- data/spec/matchers.rb +4 -4
- data/spec/metadata_spec.rb +120 -36
- data/spec/reader_spec.rb +56 -18
- data/spec/spec_helper.rb +10 -2
- data/spec/suite_helper.rb +35 -18
- data/spec/suite_spec.rb +26 -24
- metadata +15 -3
data/lib/rdf/tabular/reader.rb
CHANGED
@@ -19,6 +19,16 @@ module RDF::Tabular
|
|
19
19
|
# @return [:read]
|
20
20
|
attr_reader :input
|
21
21
|
|
22
|
+
##
|
23
|
+
# Warnings found during processing
|
24
|
+
# @return [Array<String>]
|
25
|
+
attr_reader :warnings
|
26
|
+
|
27
|
+
##
|
28
|
+
# Accumulated errors found during processing
|
29
|
+
# @return [Array<String>]
|
30
|
+
attr_reader :errors
|
31
|
+
|
22
32
|
##
|
23
33
|
# Initializes the RDF::Tabular Reader instance.
|
24
34
|
#
|
@@ -32,6 +42,8 @@ module RDF::Tabular
|
|
32
42
|
# @option options [Boolean] :noProv do not output optional provenance information
|
33
43
|
# @option options [Array] :warnings
|
34
44
|
# array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
|
45
|
+
# @option optinons [Array<Hash>] :fks_referencing_table
|
46
|
+
# When called with Table metadata, a list of the foreign keys referencing this table
|
35
47
|
# @yield [reader] `self`
|
36
48
|
# @yieldparam [RDF::Reader] reader
|
37
49
|
# @yieldreturn [void] ignored
|
@@ -48,13 +60,14 @@ module RDF::Tabular
|
|
48
60
|
end
|
49
61
|
|
50
62
|
@options[:depth] ||= 0
|
63
|
+
@errors = @options.fetch(:errors, [])
|
64
|
+
@warnings = @options.fetch(:warnings, [])
|
51
65
|
|
52
66
|
debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
|
53
67
|
|
54
68
|
# Minimal implies noProv
|
55
69
|
@options[:noProv] ||= @options[:minimal]
|
56
70
|
|
57
|
-
#byebug if input.is_a?(Array)
|
58
71
|
@input = case input
|
59
72
|
when String then StringIO.new(input)
|
60
73
|
when Array then StringIO.new(input.map {|r| r.join(",")}.join("\n"))
|
@@ -63,8 +76,8 @@ module RDF::Tabular
|
|
63
76
|
|
64
77
|
depth do
|
65
78
|
# If input is JSON, then the input is the metadata
|
66
|
-
|
67
|
-
|
79
|
+
content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
|
80
|
+
if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
|
68
81
|
@metadata = Metadata.new(@input, @options.merge(filenames: @options[:base]))
|
69
82
|
# If @metadata is for a Table, turn it into a TableGroup
|
70
83
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
@@ -79,7 +92,8 @@ module RDF::Tabular
|
|
79
92
|
dialect.header = false if (input.headers.fetch(:content_type, '').split(';').include?('header=absent') rescue false)
|
80
93
|
dialect.encoding = input.charset if (input.charset rescue nil)
|
81
94
|
dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
|
82
|
-
embed_options =
|
95
|
+
embed_options = @options.dup
|
96
|
+
embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
|
83
97
|
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
|
84
98
|
|
85
99
|
if (@metadata = @options[:metadata]) && @metadata.tableSchema
|
@@ -96,7 +110,7 @@ module RDF::Tabular
|
|
96
110
|
@metadata.dialect = dialect
|
97
111
|
else
|
98
112
|
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
99
|
-
@options[:original_input] = @input
|
113
|
+
@options[:original_input] = @input unless @options[:metadata]
|
100
114
|
@input = @metadata = Metadata.for_input(@input, @options).normalize!
|
101
115
|
end
|
102
116
|
|
@@ -126,93 +140,102 @@ module RDF::Tabular
|
|
126
140
|
debug("each_statement: metadata") {input.inspect}
|
127
141
|
|
128
142
|
depth do
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
add_statement(0, statement)
|
145
|
-
end
|
146
|
-
end unless minimal?
|
143
|
+
begin
|
144
|
+
# Validate metadata
|
145
|
+
input.validate!
|
146
|
+
|
147
|
+
# Use resolved @id of TableGroup, if available
|
148
|
+
table_group = input.id || RDF::Node.new
|
149
|
+
add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
|
150
|
+
|
151
|
+
# Common Properties
|
152
|
+
input.each do |key, value|
|
153
|
+
next unless key.to_s.include?(':') || key == :notes
|
154
|
+
input.common_properties(table_group, key, value) do |statement|
|
155
|
+
add_statement(0, statement)
|
156
|
+
end
|
157
|
+
end unless minimal?
|
147
158
|
|
148
|
-
|
149
|
-
|
150
|
-
|
159
|
+
# If we were originally given tabular data as input, simply use that, rather than opening the table URL. This allows buffered data to be used as input.
|
160
|
+
# This case also handles found metadata that doesn't describe the input file
|
161
|
+
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
162
|
+
table_resource = RDF::Node.new
|
163
|
+
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
164
|
+
Reader.new(options[:original_input], options.merge(
|
165
|
+
metadata: input.tables.first,
|
166
|
+
base: input.tables.first.url,
|
167
|
+
no_found_metadata: true,
|
168
|
+
table_resource: table_resource,
|
169
|
+
warnings: @warnings,
|
170
|
+
errors: @errors,
|
171
|
+
)) do |r|
|
172
|
+
r.each_statement(&block)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
input.each_table do |table|
|
176
|
+
# If validating, continue on to process value restrictions
|
177
|
+
next if table.suppressOutput && !validate?
|
178
|
+
|
179
|
+
# Foreign Keys referencing this table
|
180
|
+
fks = input.tables.map do |t|
|
181
|
+
t.tableSchema && t.tableSchema.foreign_keys_referencing(table)
|
182
|
+
end.flatten.compact
|
183
|
+
table_resource = table.id || RDF::Node.new
|
151
184
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
152
|
-
Reader.
|
185
|
+
Reader.open(table.url, options.merge(
|
186
|
+
metadata: table,
|
187
|
+
base: table.url,
|
153
188
|
no_found_metadata: true,
|
154
|
-
table_resource: table_resource
|
189
|
+
table_resource: table_resource,
|
190
|
+
fks_referencing_table: fks,
|
191
|
+
warnings: @warnings,
|
192
|
+
errors: @errors,
|
155
193
|
)) do |r|
|
156
194
|
r.each_statement(&block)
|
157
195
|
end
|
158
|
-
else
|
159
|
-
input.each_table do |table|
|
160
|
-
next if table.suppressOutput
|
161
|
-
table_resource = table.id || RDF::Node.new
|
162
|
-
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
163
|
-
Reader.open(table.url, options.merge(
|
164
|
-
format: :tabular,
|
165
|
-
metadata: table,
|
166
|
-
base: table.url,
|
167
|
-
no_found_metadata: true,
|
168
|
-
table_resource: table_resource
|
169
|
-
)) do |r|
|
170
|
-
r.each_statement(&block)
|
171
|
-
end
|
172
|
-
end
|
173
196
|
end
|
174
197
|
|
175
|
-
#
|
176
|
-
if
|
177
|
-
|
178
|
-
add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
|
179
|
-
add_statement(0, activity, RDF.type, RDF::PROV.Activity)
|
180
|
-
add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
|
181
|
-
add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
182
|
-
add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
183
|
-
|
184
|
-
unless (urls = input.tables.map(&:url)).empty?
|
185
|
-
usage = RDF::Node.new
|
186
|
-
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
187
|
-
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
188
|
-
urls.each do |url|
|
189
|
-
add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
|
190
|
-
end
|
191
|
-
add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
|
192
|
-
end
|
198
|
+
# Lastly, if validating, validate foreign key integrity
|
199
|
+
validate_foreign_keys(input) if validate?
|
200
|
+
end
|
193
201
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
+
# Provenance
|
203
|
+
if prov?
|
204
|
+
activity = RDF::Node.new
|
205
|
+
add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
|
206
|
+
add_statement(0, activity, RDF.type, RDF::PROV.Activity)
|
207
|
+
add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
|
208
|
+
add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
209
|
+
add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
210
|
+
|
211
|
+
unless (urls = input.tables.map(&:url)).empty?
|
212
|
+
usage = RDF::Node.new
|
213
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
214
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
215
|
+
urls.each do |url|
|
216
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
|
202
217
|
end
|
218
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
|
203
219
|
end
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
220
|
+
|
221
|
+
unless Array(input.filenames).empty?
|
222
|
+
usage = RDF::Node.new
|
223
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
224
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
225
|
+
Array(input.filenames).each do |fn|
|
226
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
|
227
|
+
end
|
228
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
|
208
229
|
end
|
209
230
|
end
|
210
|
-
|
211
|
-
|
212
|
-
|
231
|
+
ensure
|
232
|
+
warnings = @warnings.concat(input.warnings)
|
233
|
+
if validate? && !warnings.empty? && !@options[:warnings]
|
234
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
235
|
+
end
|
236
|
+
if validate? && !errors.empty? && !@options[:errors]
|
237
|
+
$stderr.puts "Errors: #{errors.join("\n")}"
|
213
238
|
end
|
214
|
-
else
|
215
|
-
raise "Opened inappropriate metadata type: #{input.type}"
|
216
239
|
end
|
217
240
|
end
|
218
241
|
return
|
@@ -220,7 +243,7 @@ module RDF::Tabular
|
|
220
243
|
|
221
244
|
# Output Table-Level RDF triples
|
222
245
|
table_resource = options.fetch(:table_resource, (metadata.id || RDF::Node.new))
|
223
|
-
unless minimal?
|
246
|
+
unless minimal? || metadata.suppressOutput
|
224
247
|
add_statement(0, table_resource, RDF.type, CSVW.Table)
|
225
248
|
add_statement(0, table_resource, CSVW.url, RDF::URI(metadata.url))
|
226
249
|
end
|
@@ -228,15 +251,24 @@ module RDF::Tabular
|
|
228
251
|
# Input is file containing CSV data.
|
229
252
|
# Output ROW-Level statements
|
230
253
|
last_row_num = 0
|
254
|
+
primary_keys = []
|
231
255
|
metadata.each_row(input) do |row|
|
232
256
|
if row.is_a?(RDF::Statement)
|
233
257
|
# May add additional comments
|
234
258
|
row.subject = table_resource
|
235
|
-
add_statement(last_row_num + 1, row)
|
259
|
+
add_statement(last_row_num + 1, row) unless metadata.suppressOutput
|
236
260
|
next
|
237
261
|
end
|
238
262
|
last_row_num = row.sourceNumber
|
239
263
|
|
264
|
+
# Collect primary and foreign keys if validating
|
265
|
+
if validate?
|
266
|
+
primary_keys << row.primaryKey
|
267
|
+
collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
|
268
|
+
end
|
269
|
+
|
270
|
+
next if metadata.suppressOutput
|
271
|
+
|
240
272
|
# Output row-level metadata
|
241
273
|
row_resource = RDF::Node.new
|
242
274
|
default_cell_subject = RDF::Node.new
|
@@ -245,8 +277,14 @@ module RDF::Tabular
|
|
245
277
|
add_statement(row.sourceNumber, row_resource, CSVW.rownum, row.number)
|
246
278
|
add_statement(row.sourceNumber, row_resource, RDF.type, CSVW.Row)
|
247
279
|
add_statement(row.sourceNumber, row_resource, CSVW.url, row.id)
|
280
|
+
row.titles.each do |t|
|
281
|
+
add_statement(row.sourceNumber, row_resource, CSVW.title, t)
|
282
|
+
end
|
248
283
|
end
|
249
284
|
row.values.each_with_index do |cell, index|
|
285
|
+
# Collect cell errors
|
286
|
+
(validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
|
287
|
+
cell.errors.join("\n") unless Array(cell.errors).empty?
|
250
288
|
next if cell.column.suppressOutput # Skip ignored cells
|
251
289
|
cell_subject = cell.aboutUrl || default_cell_subject
|
252
290
|
propertyUrl = cell.propertyUrl || RDF::URI("#{metadata.url}##{cell.column.name}")
|
@@ -269,6 +307,9 @@ module RDF::Tabular
|
|
269
307
|
end
|
270
308
|
end
|
271
309
|
|
310
|
+
# Validate primary keys
|
311
|
+
validate_primary_keys(metadata, primary_keys) if validate?
|
312
|
+
|
272
313
|
# Common Properties
|
273
314
|
metadata.each do |key, value|
|
274
315
|
next unless key.to_s.include?(':') || key == :notes
|
@@ -278,6 +319,8 @@ module RDF::Tabular
|
|
278
319
|
end unless minimal?
|
279
320
|
end
|
280
321
|
enum_for(:each_statement)
|
322
|
+
rescue IOError => e
|
323
|
+
raise RDF::ReaderError, e.message, e.backtrace
|
281
324
|
end
|
282
325
|
|
283
326
|
##
|
@@ -292,6 +335,19 @@ module RDF::Tabular
|
|
292
335
|
enum_for(:each_triple)
|
293
336
|
end
|
294
337
|
|
338
|
+
##
|
339
|
+
# Validate and raise an exception if any errors are found while processing either metadata or tables
|
340
|
+
# @return [self]
|
341
|
+
# @raise [Error]
|
342
|
+
def validate!
|
343
|
+
each_statement {} # Read all rows
|
344
|
+
raise Error, errors.join("\n") unless errors.empty?
|
345
|
+
self
|
346
|
+
rescue RDF::ReaderError => e
|
347
|
+
raise Error, e.message
|
348
|
+
self
|
349
|
+
end
|
350
|
+
|
295
351
|
##
|
296
352
|
# Transform to JSON. Note that this must be run from within the reader context if the input is an open IO stream.
|
297
353
|
#
|
@@ -317,6 +373,7 @@ module RDF::Tabular
|
|
317
373
|
# @option options [::JSON::State] :state used when dumping
|
318
374
|
# @option options [Boolean] :atd output Abstract Table representation instead
|
319
375
|
# @return [String]
|
376
|
+
# @raise [RDF::Tabular::Error]
|
320
377
|
def to_json(options = @options)
|
321
378
|
io = case options
|
322
379
|
when IO, StringIO then options
|
@@ -345,6 +402,8 @@ module RDF::Tabular
|
|
345
402
|
hash = self.send(hash_fn, options)
|
346
403
|
::JSON.generate(hash, json_state)
|
347
404
|
end
|
405
|
+
rescue IOError => e
|
406
|
+
raise RDF::Tabular::Error, e.message
|
348
407
|
end
|
349
408
|
|
350
409
|
##
|
@@ -360,77 +419,69 @@ module RDF::Tabular
|
|
360
419
|
debug("each_statement: metadata") {input.inspect}
|
361
420
|
depth do
|
362
421
|
# Get Metadata to invoke and open referenced files
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
422
|
+
begin
|
423
|
+
# Validate metadata
|
424
|
+
input.validate!
|
425
|
+
|
426
|
+
tables = []
|
427
|
+
table_group = {}
|
428
|
+
table_group['@id'] = input.id.to_s if input.id
|
429
|
+
|
430
|
+
# Common Properties
|
431
|
+
input.each do |key, value|
|
432
|
+
next unless key.to_s.include?(':') || key == :notes
|
433
|
+
table_group[key] = input.common_properties(nil, key, value)
|
434
|
+
table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
|
435
|
+
end
|
368
436
|
|
369
|
-
|
370
|
-
table_group = {}
|
371
|
-
table_group['@id'] = input.id.to_s if input.id
|
437
|
+
table_group['tables'] = tables
|
372
438
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
439
|
+
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
440
|
+
Reader.new(options[:original_input], options.merge(
|
441
|
+
metadata: input.tables.first,
|
442
|
+
base: input.tables.first.url,
|
443
|
+
minimal: minimal?,
|
444
|
+
no_found_metadata: true,
|
445
|
+
warnings: @warnings,
|
446
|
+
errors: @errors,
|
447
|
+
)) do |r|
|
448
|
+
case t = r.to_hash(options)
|
449
|
+
when Array then tables += t unless input.tables.first.suppressOutput
|
450
|
+
when Hash then tables << t unless input.tables.first.suppressOutput
|
451
|
+
end
|
378
452
|
end
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
453
|
+
else
|
454
|
+
input.each_table do |table|
|
455
|
+
next if table.suppressOutput && !validate?
|
456
|
+
Reader.open(table.url, options.merge(
|
457
|
+
metadata: table,
|
458
|
+
base: table.url,
|
459
|
+
minimal: minimal?,
|
460
|
+
no_found_metadata: true,
|
461
|
+
warnings: @warnings,
|
462
|
+
errors: @errors,
|
387
463
|
)) do |r|
|
388
|
-
case
|
389
|
-
when Array then tables += table
|
390
|
-
when Hash then tables << table
|
391
|
-
end
|
392
|
-
end
|
393
|
-
else
|
394
|
-
input.each_table do |table|
|
395
|
-
next if table.suppressOutput
|
396
|
-
Reader.open(table.url, options.merge(
|
397
|
-
format: :tabular,
|
398
|
-
metadata: table,
|
399
|
-
base: table.url,
|
400
|
-
minimal: minimal?,
|
401
|
-
no_found_metadata: true
|
402
|
-
)) do |r|
|
403
|
-
case table = r.to_hash(options)
|
404
|
-
when Array then tables += table
|
405
|
-
when Hash then tables << table
|
406
|
-
end
|
464
|
+
case t = r.to_hash(options)
|
465
|
+
when Array then tables += t unless table.suppressOutput
|
466
|
+
when Hash then tables << t unless table.suppressOutput
|
407
467
|
end
|
408
468
|
end
|
409
469
|
end
|
470
|
+
end
|
410
471
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
472
|
+
# Lastly, if validating, validate foreign key integrity
|
473
|
+
validate_foreign_keys(input) if validate?
|
474
|
+
|
475
|
+
# Result is table_group or array
|
476
|
+
minimal? ? tables : table_group
|
477
|
+
ensure
|
478
|
+
warnings = @warnings.concat(input.warnings)
|
479
|
+
if validate? && !warnings.empty? && !@options[:warnings]
|
480
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
418
481
|
end
|
419
|
-
|
420
|
-
|
421
|
-
Reader.open(input.url, options.merge(
|
422
|
-
format: :tabular,
|
423
|
-
metadata: input,
|
424
|
-
base: input.url,
|
425
|
-
minimal: minimal?,
|
426
|
-
no_found_metadata: true
|
427
|
-
)) do |r|
|
428
|
-
table = r.to_hash(options)
|
482
|
+
if validate? && !errors.empty? && !@options[:errors]
|
483
|
+
$stderr.puts "Errors: #{errors.join("\n")}"
|
429
484
|
end
|
430
|
-
|
431
|
-
table
|
432
|
-
else
|
433
|
-
raise "Opened inappropriate metadata type: #{input.type}"
|
434
485
|
end
|
435
486
|
end
|
436
487
|
else
|
@@ -443,6 +494,7 @@ module RDF::Tabular
|
|
443
494
|
|
444
495
|
# Input is file containing CSV data.
|
445
496
|
# Output ROW-Level statements
|
497
|
+
primary_keys = []
|
446
498
|
metadata.each_row(input) do |row|
|
447
499
|
if row.is_a?(RDF::Statement)
|
448
500
|
# May add additional comments
|
@@ -450,14 +502,28 @@ module RDF::Tabular
|
|
450
502
|
table['rdfs:comment'] << row.object.to_s
|
451
503
|
next
|
452
504
|
end
|
505
|
+
|
506
|
+
# Collect primary and foreign keys if validating
|
507
|
+
if validate?
|
508
|
+
primary_keys << row.primaryKey
|
509
|
+
collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
|
510
|
+
end
|
511
|
+
|
453
512
|
# Output row-level metadata
|
454
513
|
r, a, values = {}, {}, {}
|
455
514
|
r["url"] = row.id.to_s
|
456
515
|
r["rownum"] = row.number
|
457
516
|
|
517
|
+
# Row titles
|
518
|
+
Array(row.titles).each { |t| merge_compacted_value(r, "titles", t.to_s) unless t.nil?}
|
519
|
+
|
458
520
|
row.values.each_with_index do |cell, index|
|
459
521
|
column = metadata.tableSchema.columns[index]
|
460
522
|
|
523
|
+
# Collect cell errors
|
524
|
+
(validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
|
525
|
+
cell.errors.join("\n") unless Array(cell.errors).empty?
|
526
|
+
|
461
527
|
# Ignore suppressed columns
|
462
528
|
next if column.suppressOutput
|
463
529
|
|
@@ -472,7 +538,7 @@ module RDF::Tabular
|
|
472
538
|
co['@id'] = subject.to_s unless subject == 'null'
|
473
539
|
prop = case cell.propertyUrl
|
474
540
|
when RDF.type then '@type'
|
475
|
-
when nil then column.name
|
541
|
+
when nil then URI.decode(column.name) # Use URI-decoded name
|
476
542
|
else
|
477
543
|
# Compact the property to a term or prefixed name
|
478
544
|
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
@@ -487,8 +553,12 @@ module RDF::Tabular
|
|
487
553
|
values[cell.valueUrl.to_s][:count] += 1
|
488
554
|
end
|
489
555
|
cell.valueUrl.to_s
|
556
|
+
when cell.value.is_a?(RDF::Literal::Double)
|
557
|
+
cell.value.object.nan? || cell.value.object.infinite? ? cell.value : cell.value.object
|
558
|
+
when cell.value.is_a?(RDF::Literal::Integer)
|
559
|
+
cell.value.object.to_i
|
490
560
|
when cell.value.is_a?(RDF::Literal::Numeric)
|
491
|
-
cell.value.object
|
561
|
+
cell.value.object.to_f
|
492
562
|
when cell.value.is_a?(RDF::Literal::Boolean)
|
493
563
|
cell.value.object
|
494
564
|
when cell.value
|
@@ -520,6 +590,9 @@ module RDF::Tabular
|
|
520
590
|
end
|
521
591
|
end
|
522
592
|
|
593
|
+
# Validate primary keys
|
594
|
+
validate_primary_keys(metadata, primary_keys) if validate?
|
595
|
+
|
523
596
|
# Use string values notes and common properties
|
524
597
|
metadata.each do |key, value|
|
525
598
|
next unless key.to_s.include?(':') || key == :notes
|
@@ -545,7 +618,7 @@ module RDF::Tabular
|
|
545
618
|
table_group = input.to_atd
|
546
619
|
if input.tables.empty? && options[:original_input]
|
547
620
|
Reader.new(options[:original_input], options.merge(
|
548
|
-
base:
|
621
|
+
base: options[:base],
|
549
622
|
no_found_metadata: true
|
550
623
|
)) do |r|
|
551
624
|
table_group["tables"] << r.to_atd(options)
|
@@ -621,6 +694,65 @@ module RDF::Tabular
|
|
621
694
|
@callback.call(statement)
|
622
695
|
end
|
623
696
|
|
697
|
+
# Validate primary keys
|
698
|
+
def validate_primary_keys(metadata, primary_keys)
|
699
|
+
pk_strings = {}
|
700
|
+
primary_keys.reject(&:empty?).each do |row_pks|
|
701
|
+
pk_names = row_pks.map {|cell| cell.value}.join(",")
|
702
|
+
errors << "Table #{metadata.url} has duplicate primary key #{pk_names}" if pk_strings.has_key?(pk_names)
|
703
|
+
pk_strings[pk_names] ||= 0
|
704
|
+
pk_strings[pk_names] += 1
|
705
|
+
end
|
706
|
+
end
|
707
|
+
|
708
|
+
# Collect foreign key references
|
709
|
+
# @param [Table] metadata
|
710
|
+
# @param [Array<Hash>] foreign_keys referencing this table
|
711
|
+
# @param [Row] row
|
712
|
+
def collect_foreign_key_references(metadata, foreign_keys, row)
|
713
|
+
schema = metadata.tableSchema
|
714
|
+
|
715
|
+
# Add row as foreignKey source
|
716
|
+
Array(schema ? schema.foreignKeys : []).each do |fk|
|
717
|
+
colRef = Array(fk['columnReference'])
|
718
|
+
|
719
|
+
# Referenced cells, in order
|
720
|
+
cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
|
721
|
+
cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
|
722
|
+
next if cell_values.empty? # Don't record if empty
|
723
|
+
(fk[:reference_from] ||= {})[cell_values] ||= row
|
724
|
+
end
|
725
|
+
|
726
|
+
# Add row as foreignKey dest
|
727
|
+
Array(foreign_keys).each do |fk|
|
728
|
+
colRef = Array(fk['reference']['columnReference'])
|
729
|
+
|
730
|
+
# Referenced cells, in order
|
731
|
+
cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
|
732
|
+
fk[:reference_to] ||= {}
|
733
|
+
cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
|
734
|
+
next if cell_values.empty? # Don't record if empty
|
735
|
+
errors << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}): found duplicate foreign key target: #{cell_values.map(&:to_s).inspect}" if fk[:reference_to][cell_values]
|
736
|
+
fk[:reference_to][cell_values] ||= row
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
# Validate foreign keys
|
741
|
+
def validate_foreign_keys(metadata)
|
742
|
+
metadata.tables.each do |table|
|
743
|
+
next if (schema = table.tableSchema).nil?
|
744
|
+
schema.foreignKeys.each do |fk|
|
745
|
+
# Verify that reference_from entry exists in reference_to
|
746
|
+
fk.fetch(:reference_from, {}).each do |cell_values, row|
|
747
|
+
unless fk.fetch(:reference_to, {}).has_key?(cell_values)
|
748
|
+
errors << "Table #{table.url} row #{row.number}(src #{row.sourceNumber}): " +
|
749
|
+
"Foreign Key violation, expected to find #{cell_values.map(&:to_s).inspect}"
|
750
|
+
end
|
751
|
+
end
|
752
|
+
end if schema.foreignKeys
|
753
|
+
end
|
754
|
+
end
|
755
|
+
|
624
756
|
# Merge values into compacted results, creating arrays if necessary
|
625
757
|
def merge_compacted_value(hash, key, value)
|
626
758
|
return unless hash
|