rdf-tabular 0.1.3.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/VERSION +1 -1
- data/etc/README +4 -0
- data/etc/csvw.jsonld +1 -1
- data/etc/earl.html +10544 -0
- data/etc/earl.jsonld +17068 -0
- data/etc/earl.ttl +7114 -0
- data/etc/template.haml +205 -0
- data/etc/well-known +4 -0
- data/lib/rdf/tabular.rb +10 -4
- data/lib/rdf/tabular/format.rb +11 -7
- data/lib/rdf/tabular/metadata.rb +761 -314
- data/lib/rdf/tabular/reader.rb +276 -144
- data/spec/format_spec.rb +11 -8
- data/spec/matchers.rb +4 -4
- data/spec/metadata_spec.rb +120 -36
- data/spec/reader_spec.rb +56 -18
- data/spec/spec_helper.rb +10 -2
- data/spec/suite_helper.rb +35 -18
- data/spec/suite_spec.rb +26 -24
- metadata +15 -3
data/lib/rdf/tabular/reader.rb
CHANGED
@@ -19,6 +19,16 @@ module RDF::Tabular
|
|
19
19
|
# @return [:read]
|
20
20
|
attr_reader :input
|
21
21
|
|
22
|
+
##
|
23
|
+
# Warnings found during processing
|
24
|
+
# @return [Array<String>]
|
25
|
+
attr_reader :warnings
|
26
|
+
|
27
|
+
##
|
28
|
+
# Accumulated errors found during processing
|
29
|
+
# @return [Array<String>]
|
30
|
+
attr_reader :errors
|
31
|
+
|
22
32
|
##
|
23
33
|
# Initializes the RDF::Tabular Reader instance.
|
24
34
|
#
|
@@ -32,6 +42,8 @@ module RDF::Tabular
|
|
32
42
|
# @option options [Boolean] :noProv do not output optional provenance information
|
33
43
|
# @option options [Array] :warnings
|
34
44
|
# array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
|
45
|
+
# @option optinons [Array<Hash>] :fks_referencing_table
|
46
|
+
# When called with Table metadata, a list of the foreign keys referencing this table
|
35
47
|
# @yield [reader] `self`
|
36
48
|
# @yieldparam [RDF::Reader] reader
|
37
49
|
# @yieldreturn [void] ignored
|
@@ -48,13 +60,14 @@ module RDF::Tabular
|
|
48
60
|
end
|
49
61
|
|
50
62
|
@options[:depth] ||= 0
|
63
|
+
@errors = @options.fetch(:errors, [])
|
64
|
+
@warnings = @options.fetch(:warnings, [])
|
51
65
|
|
52
66
|
debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
|
53
67
|
|
54
68
|
# Minimal implies noProv
|
55
69
|
@options[:noProv] ||= @options[:minimal]
|
56
70
|
|
57
|
-
#byebug if input.is_a?(Array)
|
58
71
|
@input = case input
|
59
72
|
when String then StringIO.new(input)
|
60
73
|
when Array then StringIO.new(input.map {|r| r.join(",")}.join("\n"))
|
@@ -63,8 +76,8 @@ module RDF::Tabular
|
|
63
76
|
|
64
77
|
depth do
|
65
78
|
# If input is JSON, then the input is the metadata
|
66
|
-
|
67
|
-
|
79
|
+
content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
|
80
|
+
if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
|
68
81
|
@metadata = Metadata.new(@input, @options.merge(filenames: @options[:base]))
|
69
82
|
# If @metadata is for a Table, turn it into a TableGroup
|
70
83
|
@metadata = @metadata.to_table_group if @metadata.is_a?(Table)
|
@@ -79,7 +92,8 @@ module RDF::Tabular
|
|
79
92
|
dialect.header = false if (input.headers.fetch(:content_type, '').split(';').include?('header=absent') rescue false)
|
80
93
|
dialect.encoding = input.charset if (input.charset rescue nil)
|
81
94
|
dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
|
82
|
-
embed_options =
|
95
|
+
embed_options = @options.dup
|
96
|
+
embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
|
83
97
|
embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
|
84
98
|
|
85
99
|
if (@metadata = @options[:metadata]) && @metadata.tableSchema
|
@@ -96,7 +110,7 @@ module RDF::Tabular
|
|
96
110
|
@metadata.dialect = dialect
|
97
111
|
else
|
98
112
|
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
99
|
-
@options[:original_input] = @input
|
113
|
+
@options[:original_input] = @input unless @options[:metadata]
|
100
114
|
@input = @metadata = Metadata.for_input(@input, @options).normalize!
|
101
115
|
end
|
102
116
|
|
@@ -126,93 +140,102 @@ module RDF::Tabular
|
|
126
140
|
debug("each_statement: metadata") {input.inspect}
|
127
141
|
|
128
142
|
depth do
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
add_statement(0, statement)
|
145
|
-
end
|
146
|
-
end unless minimal?
|
143
|
+
begin
|
144
|
+
# Validate metadata
|
145
|
+
input.validate!
|
146
|
+
|
147
|
+
# Use resolved @id of TableGroup, if available
|
148
|
+
table_group = input.id || RDF::Node.new
|
149
|
+
add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
|
150
|
+
|
151
|
+
# Common Properties
|
152
|
+
input.each do |key, value|
|
153
|
+
next unless key.to_s.include?(':') || key == :notes
|
154
|
+
input.common_properties(table_group, key, value) do |statement|
|
155
|
+
add_statement(0, statement)
|
156
|
+
end
|
157
|
+
end unless minimal?
|
147
158
|
|
148
|
-
|
149
|
-
|
150
|
-
|
159
|
+
# If we were originally given tabular data as input, simply use that, rather than opening the table URL. This allows buffered data to be used as input.
|
160
|
+
# This case also handles found metadata that doesn't describe the input file
|
161
|
+
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
162
|
+
table_resource = RDF::Node.new
|
163
|
+
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
164
|
+
Reader.new(options[:original_input], options.merge(
|
165
|
+
metadata: input.tables.first,
|
166
|
+
base: input.tables.first.url,
|
167
|
+
no_found_metadata: true,
|
168
|
+
table_resource: table_resource,
|
169
|
+
warnings: @warnings,
|
170
|
+
errors: @errors,
|
171
|
+
)) do |r|
|
172
|
+
r.each_statement(&block)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
input.each_table do |table|
|
176
|
+
# If validating, continue on to process value restrictions
|
177
|
+
next if table.suppressOutput && !validate?
|
178
|
+
|
179
|
+
# Foreign Keys referencing this table
|
180
|
+
fks = input.tables.map do |t|
|
181
|
+
t.tableSchema && t.tableSchema.foreign_keys_referencing(table)
|
182
|
+
end.flatten.compact
|
183
|
+
table_resource = table.id || RDF::Node.new
|
151
184
|
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
152
|
-
Reader.
|
185
|
+
Reader.open(table.url, options.merge(
|
186
|
+
metadata: table,
|
187
|
+
base: table.url,
|
153
188
|
no_found_metadata: true,
|
154
|
-
table_resource: table_resource
|
189
|
+
table_resource: table_resource,
|
190
|
+
fks_referencing_table: fks,
|
191
|
+
warnings: @warnings,
|
192
|
+
errors: @errors,
|
155
193
|
)) do |r|
|
156
194
|
r.each_statement(&block)
|
157
195
|
end
|
158
|
-
else
|
159
|
-
input.each_table do |table|
|
160
|
-
next if table.suppressOutput
|
161
|
-
table_resource = table.id || RDF::Node.new
|
162
|
-
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
163
|
-
Reader.open(table.url, options.merge(
|
164
|
-
format: :tabular,
|
165
|
-
metadata: table,
|
166
|
-
base: table.url,
|
167
|
-
no_found_metadata: true,
|
168
|
-
table_resource: table_resource
|
169
|
-
)) do |r|
|
170
|
-
r.each_statement(&block)
|
171
|
-
end
|
172
|
-
end
|
173
196
|
end
|
174
197
|
|
175
|
-
#
|
176
|
-
if
|
177
|
-
|
178
|
-
add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
|
179
|
-
add_statement(0, activity, RDF.type, RDF::PROV.Activity)
|
180
|
-
add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
|
181
|
-
add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
182
|
-
add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
183
|
-
|
184
|
-
unless (urls = input.tables.map(&:url)).empty?
|
185
|
-
usage = RDF::Node.new
|
186
|
-
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
187
|
-
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
188
|
-
urls.each do |url|
|
189
|
-
add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
|
190
|
-
end
|
191
|
-
add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
|
192
|
-
end
|
198
|
+
# Lastly, if validating, validate foreign key integrity
|
199
|
+
validate_foreign_keys(input) if validate?
|
200
|
+
end
|
193
201
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
+
# Provenance
|
203
|
+
if prov?
|
204
|
+
activity = RDF::Node.new
|
205
|
+
add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
|
206
|
+
add_statement(0, activity, RDF.type, RDF::PROV.Activity)
|
207
|
+
add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
|
208
|
+
add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
209
|
+
add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
210
|
+
|
211
|
+
unless (urls = input.tables.map(&:url)).empty?
|
212
|
+
usage = RDF::Node.new
|
213
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
214
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
215
|
+
urls.each do |url|
|
216
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
|
202
217
|
end
|
218
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
|
203
219
|
end
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
220
|
+
|
221
|
+
unless Array(input.filenames).empty?
|
222
|
+
usage = RDF::Node.new
|
223
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
224
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
225
|
+
Array(input.filenames).each do |fn|
|
226
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
|
227
|
+
end
|
228
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
|
208
229
|
end
|
209
230
|
end
|
210
|
-
|
211
|
-
|
212
|
-
|
231
|
+
ensure
|
232
|
+
warnings = @warnings.concat(input.warnings)
|
233
|
+
if validate? && !warnings.empty? && !@options[:warnings]
|
234
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
235
|
+
end
|
236
|
+
if validate? && !errors.empty? && !@options[:errors]
|
237
|
+
$stderr.puts "Errors: #{errors.join("\n")}"
|
213
238
|
end
|
214
|
-
else
|
215
|
-
raise "Opened inappropriate metadata type: #{input.type}"
|
216
239
|
end
|
217
240
|
end
|
218
241
|
return
|
@@ -220,7 +243,7 @@ module RDF::Tabular
|
|
220
243
|
|
221
244
|
# Output Table-Level RDF triples
|
222
245
|
table_resource = options.fetch(:table_resource, (metadata.id || RDF::Node.new))
|
223
|
-
unless minimal?
|
246
|
+
unless minimal? || metadata.suppressOutput
|
224
247
|
add_statement(0, table_resource, RDF.type, CSVW.Table)
|
225
248
|
add_statement(0, table_resource, CSVW.url, RDF::URI(metadata.url))
|
226
249
|
end
|
@@ -228,15 +251,24 @@ module RDF::Tabular
|
|
228
251
|
# Input is file containing CSV data.
|
229
252
|
# Output ROW-Level statements
|
230
253
|
last_row_num = 0
|
254
|
+
primary_keys = []
|
231
255
|
metadata.each_row(input) do |row|
|
232
256
|
if row.is_a?(RDF::Statement)
|
233
257
|
# May add additional comments
|
234
258
|
row.subject = table_resource
|
235
|
-
add_statement(last_row_num + 1, row)
|
259
|
+
add_statement(last_row_num + 1, row) unless metadata.suppressOutput
|
236
260
|
next
|
237
261
|
end
|
238
262
|
last_row_num = row.sourceNumber
|
239
263
|
|
264
|
+
# Collect primary and foreign keys if validating
|
265
|
+
if validate?
|
266
|
+
primary_keys << row.primaryKey
|
267
|
+
collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
|
268
|
+
end
|
269
|
+
|
270
|
+
next if metadata.suppressOutput
|
271
|
+
|
240
272
|
# Output row-level metadata
|
241
273
|
row_resource = RDF::Node.new
|
242
274
|
default_cell_subject = RDF::Node.new
|
@@ -245,8 +277,14 @@ module RDF::Tabular
|
|
245
277
|
add_statement(row.sourceNumber, row_resource, CSVW.rownum, row.number)
|
246
278
|
add_statement(row.sourceNumber, row_resource, RDF.type, CSVW.Row)
|
247
279
|
add_statement(row.sourceNumber, row_resource, CSVW.url, row.id)
|
280
|
+
row.titles.each do |t|
|
281
|
+
add_statement(row.sourceNumber, row_resource, CSVW.title, t)
|
282
|
+
end
|
248
283
|
end
|
249
284
|
row.values.each_with_index do |cell, index|
|
285
|
+
# Collect cell errors
|
286
|
+
(validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
|
287
|
+
cell.errors.join("\n") unless Array(cell.errors).empty?
|
250
288
|
next if cell.column.suppressOutput # Skip ignored cells
|
251
289
|
cell_subject = cell.aboutUrl || default_cell_subject
|
252
290
|
propertyUrl = cell.propertyUrl || RDF::URI("#{metadata.url}##{cell.column.name}")
|
@@ -269,6 +307,9 @@ module RDF::Tabular
|
|
269
307
|
end
|
270
308
|
end
|
271
309
|
|
310
|
+
# Validate primary keys
|
311
|
+
validate_primary_keys(metadata, primary_keys) if validate?
|
312
|
+
|
272
313
|
# Common Properties
|
273
314
|
metadata.each do |key, value|
|
274
315
|
next unless key.to_s.include?(':') || key == :notes
|
@@ -278,6 +319,8 @@ module RDF::Tabular
|
|
278
319
|
end unless minimal?
|
279
320
|
end
|
280
321
|
enum_for(:each_statement)
|
322
|
+
rescue IOError => e
|
323
|
+
raise RDF::ReaderError, e.message, e.backtrace
|
281
324
|
end
|
282
325
|
|
283
326
|
##
|
@@ -292,6 +335,19 @@ module RDF::Tabular
|
|
292
335
|
enum_for(:each_triple)
|
293
336
|
end
|
294
337
|
|
338
|
+
##
|
339
|
+
# Validate and raise an exception if any errors are found while processing either metadata or tables
|
340
|
+
# @return [self]
|
341
|
+
# @raise [Error]
|
342
|
+
def validate!
|
343
|
+
each_statement {} # Read all rows
|
344
|
+
raise Error, errors.join("\n") unless errors.empty?
|
345
|
+
self
|
346
|
+
rescue RDF::ReaderError => e
|
347
|
+
raise Error, e.message
|
348
|
+
self
|
349
|
+
end
|
350
|
+
|
295
351
|
##
|
296
352
|
# Transform to JSON. Note that this must be run from within the reader context if the input is an open IO stream.
|
297
353
|
#
|
@@ -317,6 +373,7 @@ module RDF::Tabular
|
|
317
373
|
# @option options [::JSON::State] :state used when dumping
|
318
374
|
# @option options [Boolean] :atd output Abstract Table representation instead
|
319
375
|
# @return [String]
|
376
|
+
# @raise [RDF::Tabular::Error]
|
320
377
|
def to_json(options = @options)
|
321
378
|
io = case options
|
322
379
|
when IO, StringIO then options
|
@@ -345,6 +402,8 @@ module RDF::Tabular
|
|
345
402
|
hash = self.send(hash_fn, options)
|
346
403
|
::JSON.generate(hash, json_state)
|
347
404
|
end
|
405
|
+
rescue IOError => e
|
406
|
+
raise RDF::Tabular::Error, e.message
|
348
407
|
end
|
349
408
|
|
350
409
|
##
|
@@ -360,77 +419,69 @@ module RDF::Tabular
|
|
360
419
|
debug("each_statement: metadata") {input.inspect}
|
361
420
|
depth do
|
362
421
|
# Get Metadata to invoke and open referenced files
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
422
|
+
begin
|
423
|
+
# Validate metadata
|
424
|
+
input.validate!
|
425
|
+
|
426
|
+
tables = []
|
427
|
+
table_group = {}
|
428
|
+
table_group['@id'] = input.id.to_s if input.id
|
429
|
+
|
430
|
+
# Common Properties
|
431
|
+
input.each do |key, value|
|
432
|
+
next unless key.to_s.include?(':') || key == :notes
|
433
|
+
table_group[key] = input.common_properties(nil, key, value)
|
434
|
+
table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
|
435
|
+
end
|
368
436
|
|
369
|
-
|
370
|
-
table_group = {}
|
371
|
-
table_group['@id'] = input.id.to_s if input.id
|
437
|
+
table_group['tables'] = tables
|
372
438
|
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
439
|
+
if options[:original_input] && !input.describes_file?(options[:base_uri])
|
440
|
+
Reader.new(options[:original_input], options.merge(
|
441
|
+
metadata: input.tables.first,
|
442
|
+
base: input.tables.first.url,
|
443
|
+
minimal: minimal?,
|
444
|
+
no_found_metadata: true,
|
445
|
+
warnings: @warnings,
|
446
|
+
errors: @errors,
|
447
|
+
)) do |r|
|
448
|
+
case t = r.to_hash(options)
|
449
|
+
when Array then tables += t unless input.tables.first.suppressOutput
|
450
|
+
when Hash then tables << t unless input.tables.first.suppressOutput
|
451
|
+
end
|
378
452
|
end
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
453
|
+
else
|
454
|
+
input.each_table do |table|
|
455
|
+
next if table.suppressOutput && !validate?
|
456
|
+
Reader.open(table.url, options.merge(
|
457
|
+
metadata: table,
|
458
|
+
base: table.url,
|
459
|
+
minimal: minimal?,
|
460
|
+
no_found_metadata: true,
|
461
|
+
warnings: @warnings,
|
462
|
+
errors: @errors,
|
387
463
|
)) do |r|
|
388
|
-
case
|
389
|
-
when Array then tables += table
|
390
|
-
when Hash then tables << table
|
391
|
-
end
|
392
|
-
end
|
393
|
-
else
|
394
|
-
input.each_table do |table|
|
395
|
-
next if table.suppressOutput
|
396
|
-
Reader.open(table.url, options.merge(
|
397
|
-
format: :tabular,
|
398
|
-
metadata: table,
|
399
|
-
base: table.url,
|
400
|
-
minimal: minimal?,
|
401
|
-
no_found_metadata: true
|
402
|
-
)) do |r|
|
403
|
-
case table = r.to_hash(options)
|
404
|
-
when Array then tables += table
|
405
|
-
when Hash then tables << table
|
406
|
-
end
|
464
|
+
case t = r.to_hash(options)
|
465
|
+
when Array then tables += t unless table.suppressOutput
|
466
|
+
when Hash then tables << t unless table.suppressOutput
|
407
467
|
end
|
408
468
|
end
|
409
469
|
end
|
470
|
+
end
|
410
471
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
472
|
+
# Lastly, if validating, validate foreign key integrity
|
473
|
+
validate_foreign_keys(input) if validate?
|
474
|
+
|
475
|
+
# Result is table_group or array
|
476
|
+
minimal? ? tables : table_group
|
477
|
+
ensure
|
478
|
+
warnings = @warnings.concat(input.warnings)
|
479
|
+
if validate? && !warnings.empty? && !@options[:warnings]
|
480
|
+
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
418
481
|
end
|
419
|
-
|
420
|
-
|
421
|
-
Reader.open(input.url, options.merge(
|
422
|
-
format: :tabular,
|
423
|
-
metadata: input,
|
424
|
-
base: input.url,
|
425
|
-
minimal: minimal?,
|
426
|
-
no_found_metadata: true
|
427
|
-
)) do |r|
|
428
|
-
table = r.to_hash(options)
|
482
|
+
if validate? && !errors.empty? && !@options[:errors]
|
483
|
+
$stderr.puts "Errors: #{errors.join("\n")}"
|
429
484
|
end
|
430
|
-
|
431
|
-
table
|
432
|
-
else
|
433
|
-
raise "Opened inappropriate metadata type: #{input.type}"
|
434
485
|
end
|
435
486
|
end
|
436
487
|
else
|
@@ -443,6 +494,7 @@ module RDF::Tabular
|
|
443
494
|
|
444
495
|
# Input is file containing CSV data.
|
445
496
|
# Output ROW-Level statements
|
497
|
+
primary_keys = []
|
446
498
|
metadata.each_row(input) do |row|
|
447
499
|
if row.is_a?(RDF::Statement)
|
448
500
|
# May add additional comments
|
@@ -450,14 +502,28 @@ module RDF::Tabular
|
|
450
502
|
table['rdfs:comment'] << row.object.to_s
|
451
503
|
next
|
452
504
|
end
|
505
|
+
|
506
|
+
# Collect primary and foreign keys if validating
|
507
|
+
if validate?
|
508
|
+
primary_keys << row.primaryKey
|
509
|
+
collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
|
510
|
+
end
|
511
|
+
|
453
512
|
# Output row-level metadata
|
454
513
|
r, a, values = {}, {}, {}
|
455
514
|
r["url"] = row.id.to_s
|
456
515
|
r["rownum"] = row.number
|
457
516
|
|
517
|
+
# Row titles
|
518
|
+
Array(row.titles).each { |t| merge_compacted_value(r, "titles", t.to_s) unless t.nil?}
|
519
|
+
|
458
520
|
row.values.each_with_index do |cell, index|
|
459
521
|
column = metadata.tableSchema.columns[index]
|
460
522
|
|
523
|
+
# Collect cell errors
|
524
|
+
(validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
|
525
|
+
cell.errors.join("\n") unless Array(cell.errors).empty?
|
526
|
+
|
461
527
|
# Ignore suppressed columns
|
462
528
|
next if column.suppressOutput
|
463
529
|
|
@@ -472,7 +538,7 @@ module RDF::Tabular
|
|
472
538
|
co['@id'] = subject.to_s unless subject == 'null'
|
473
539
|
prop = case cell.propertyUrl
|
474
540
|
when RDF.type then '@type'
|
475
|
-
when nil then column.name
|
541
|
+
when nil then URI.decode(column.name) # Use URI-decoded name
|
476
542
|
else
|
477
543
|
# Compact the property to a term or prefixed name
|
478
544
|
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
@@ -487,8 +553,12 @@ module RDF::Tabular
|
|
487
553
|
values[cell.valueUrl.to_s][:count] += 1
|
488
554
|
end
|
489
555
|
cell.valueUrl.to_s
|
556
|
+
when cell.value.is_a?(RDF::Literal::Double)
|
557
|
+
cell.value.object.nan? || cell.value.object.infinite? ? cell.value : cell.value.object
|
558
|
+
when cell.value.is_a?(RDF::Literal::Integer)
|
559
|
+
cell.value.object.to_i
|
490
560
|
when cell.value.is_a?(RDF::Literal::Numeric)
|
491
|
-
cell.value.object
|
561
|
+
cell.value.object.to_f
|
492
562
|
when cell.value.is_a?(RDF::Literal::Boolean)
|
493
563
|
cell.value.object
|
494
564
|
when cell.value
|
@@ -520,6 +590,9 @@ module RDF::Tabular
|
|
520
590
|
end
|
521
591
|
end
|
522
592
|
|
593
|
+
# Validate primary keys
|
594
|
+
validate_primary_keys(metadata, primary_keys) if validate?
|
595
|
+
|
523
596
|
# Use string values notes and common properties
|
524
597
|
metadata.each do |key, value|
|
525
598
|
next unless key.to_s.include?(':') || key == :notes
|
@@ -545,7 +618,7 @@ module RDF::Tabular
|
|
545
618
|
table_group = input.to_atd
|
546
619
|
if input.tables.empty? && options[:original_input]
|
547
620
|
Reader.new(options[:original_input], options.merge(
|
548
|
-
base:
|
621
|
+
base: options[:base],
|
549
622
|
no_found_metadata: true
|
550
623
|
)) do |r|
|
551
624
|
table_group["tables"] << r.to_atd(options)
|
@@ -621,6 +694,65 @@ module RDF::Tabular
|
|
621
694
|
@callback.call(statement)
|
622
695
|
end
|
623
696
|
|
697
|
+
# Validate primary keys
|
698
|
+
def validate_primary_keys(metadata, primary_keys)
|
699
|
+
pk_strings = {}
|
700
|
+
primary_keys.reject(&:empty?).each do |row_pks|
|
701
|
+
pk_names = row_pks.map {|cell| cell.value}.join(",")
|
702
|
+
errors << "Table #{metadata.url} has duplicate primary key #{pk_names}" if pk_strings.has_key?(pk_names)
|
703
|
+
pk_strings[pk_names] ||= 0
|
704
|
+
pk_strings[pk_names] += 1
|
705
|
+
end
|
706
|
+
end
|
707
|
+
|
708
|
+
# Collect foreign key references
|
709
|
+
# @param [Table] metadata
|
710
|
+
# @param [Array<Hash>] foreign_keys referencing this table
|
711
|
+
# @param [Row] row
|
712
|
+
def collect_foreign_key_references(metadata, foreign_keys, row)
|
713
|
+
schema = metadata.tableSchema
|
714
|
+
|
715
|
+
# Add row as foreignKey source
|
716
|
+
Array(schema ? schema.foreignKeys : []).each do |fk|
|
717
|
+
colRef = Array(fk['columnReference'])
|
718
|
+
|
719
|
+
# Referenced cells, in order
|
720
|
+
cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
|
721
|
+
cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
|
722
|
+
next if cell_values.empty? # Don't record if empty
|
723
|
+
(fk[:reference_from] ||= {})[cell_values] ||= row
|
724
|
+
end
|
725
|
+
|
726
|
+
# Add row as foreignKey dest
|
727
|
+
Array(foreign_keys).each do |fk|
|
728
|
+
colRef = Array(fk['reference']['columnReference'])
|
729
|
+
|
730
|
+
# Referenced cells, in order
|
731
|
+
cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
|
732
|
+
fk[:reference_to] ||= {}
|
733
|
+
cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
|
734
|
+
next if cell_values.empty? # Don't record if empty
|
735
|
+
errors << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}): found duplicate foreign key target: #{cell_values.map(&:to_s).inspect}" if fk[:reference_to][cell_values]
|
736
|
+
fk[:reference_to][cell_values] ||= row
|
737
|
+
end
|
738
|
+
end
|
739
|
+
|
740
|
+
# Validate foreign keys
|
741
|
+
def validate_foreign_keys(metadata)
|
742
|
+
metadata.tables.each do |table|
|
743
|
+
next if (schema = table.tableSchema).nil?
|
744
|
+
schema.foreignKeys.each do |fk|
|
745
|
+
# Verify that reference_from entry exists in reference_to
|
746
|
+
fk.fetch(:reference_from, {}).each do |cell_values, row|
|
747
|
+
unless fk.fetch(:reference_to, {}).has_key?(cell_values)
|
748
|
+
errors << "Table #{table.url} row #{row.number}(src #{row.sourceNumber}): " +
|
749
|
+
"Foreign Key violation, expected to find #{cell_values.map(&:to_s).inspect}"
|
750
|
+
end
|
751
|
+
end
|
752
|
+
end if schema.foreignKeys
|
753
|
+
end
|
754
|
+
end
|
755
|
+
|
624
756
|
# Merge values into compacted results, creating arrays if necessary
|
625
757
|
def merge_compacted_value(hash, key, value)
|
626
758
|
return unless hash
|