rdf-tabular 0.1.3.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,6 +19,16 @@ module RDF::Tabular
19
19
  # @return [:read]
20
20
  attr_reader :input
21
21
 
22
+ ##
23
+ # Warnings found during processing
24
+ # @return [Array<String>]
25
+ attr_reader :warnings
26
+
27
+ ##
28
+ # Accumulated errors found during processing
29
+ # @return [Array<String>]
30
+ attr_reader :errors
31
+
22
32
  ##
23
33
  # Initializes the RDF::Tabular Reader instance.
24
34
  #
@@ -32,6 +42,8 @@ module RDF::Tabular
32
42
  # @option options [Boolean] :noProv do not output optional provenance information
33
43
  # @option options [Array] :warnings
34
44
  # array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
45
+ # @option optinons [Array<Hash>] :fks_referencing_table
46
+ # When called with Table metadata, a list of the foreign keys referencing this table
35
47
  # @yield [reader] `self`
36
48
  # @yieldparam [RDF::Reader] reader
37
49
  # @yieldreturn [void] ignored
@@ -48,13 +60,14 @@ module RDF::Tabular
48
60
  end
49
61
 
50
62
  @options[:depth] ||= 0
63
+ @errors = @options.fetch(:errors, [])
64
+ @warnings = @options.fetch(:warnings, [])
51
65
 
52
66
  debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
53
67
 
54
68
  # Minimal implies noProv
55
69
  @options[:noProv] ||= @options[:minimal]
56
70
 
57
- #byebug if input.is_a?(Array)
58
71
  @input = case input
59
72
  when String then StringIO.new(input)
60
73
  when Array then StringIO.new(input.map {|r| r.join(",")}.join("\n"))
@@ -63,8 +76,8 @@ module RDF::Tabular
63
76
 
64
77
  depth do
65
78
  # If input is JSON, then the input is the metadata
66
- if @options[:base] =~ /\.json(?:ld)?$/ ||
67
- @input.respond_to?(:content_type) && @input.content_type =~ %r(application/(?:ld+)json)
79
+ content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
80
+ if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
68
81
  @metadata = Metadata.new(@input, @options.merge(filenames: @options[:base]))
69
82
  # If @metadata is for a Table, turn it into a TableGroup
70
83
  @metadata = @metadata.to_table_group if @metadata.is_a?(Table)
@@ -79,7 +92,8 @@ module RDF::Tabular
79
92
  dialect.header = false if (input.headers.fetch(:content_type, '').split(';').include?('header=absent') rescue false)
80
93
  dialect.encoding = input.charset if (input.charset rescue nil)
81
94
  dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
82
- embed_options = {base: "http://example.org/default-metadata"}.merge(@options)
95
+ embed_options = @options.dup
96
+ embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
83
97
  embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
84
98
 
85
99
  if (@metadata = @options[:metadata]) && @metadata.tableSchema
@@ -96,7 +110,7 @@ module RDF::Tabular
96
110
  @metadata.dialect = dialect
97
111
  else
98
112
  # It's tabluar data. Find metadata and proceed as if it was specified in the first place
99
- @options[:original_input] = @input
113
+ @options[:original_input] = @input unless @options[:metadata]
100
114
  @input = @metadata = Metadata.for_input(@input, @options).normalize!
101
115
  end
102
116
 
@@ -126,93 +140,102 @@ module RDF::Tabular
126
140
  debug("each_statement: metadata") {input.inspect}
127
141
 
128
142
  depth do
129
- # Get Metadata to invoke and open referenced files
130
- case input.type
131
- when :TableGroup
132
- begin
133
- # Validate metadata
134
- input.validate!
135
-
136
- # Use resolved @id of TableGroup, if available
137
- table_group = input.id || RDF::Node.new
138
- add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
139
-
140
- # Common Properties
141
- input.each do |key, value|
142
- next unless key.to_s.include?(':') || key == :notes
143
- input.common_properties(table_group, key, value) do |statement|
144
- add_statement(0, statement)
145
- end
146
- end unless minimal?
143
+ begin
144
+ # Validate metadata
145
+ input.validate!
146
+
147
+ # Use resolved @id of TableGroup, if available
148
+ table_group = input.id || RDF::Node.new
149
+ add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
150
+
151
+ # Common Properties
152
+ input.each do |key, value|
153
+ next unless key.to_s.include?(':') || key == :notes
154
+ input.common_properties(table_group, key, value) do |statement|
155
+ add_statement(0, statement)
156
+ end
157
+ end unless minimal?
147
158
 
148
- # If we were originally given tabular data as input, simply use that, rather than opening the table URL. This allows buffered data to be used as input
149
- if Array(input.tables).empty? && options[:original_input]
150
- table_resource = RDF::Node.new
159
+ # If we were originally given tabular data as input, simply use that, rather than opening the table URL. This allows buffered data to be used as input.
160
+ # This case also handles found metadata that doesn't describe the input file
161
+ if options[:original_input] && !input.describes_file?(options[:base_uri])
162
+ table_resource = RDF::Node.new
163
+ add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
164
+ Reader.new(options[:original_input], options.merge(
165
+ metadata: input.tables.first,
166
+ base: input.tables.first.url,
167
+ no_found_metadata: true,
168
+ table_resource: table_resource,
169
+ warnings: @warnings,
170
+ errors: @errors,
171
+ )) do |r|
172
+ r.each_statement(&block)
173
+ end
174
+ else
175
+ input.each_table do |table|
176
+ # If validating, continue on to process value restrictions
177
+ next if table.suppressOutput && !validate?
178
+
179
+ # Foreign Keys referencing this table
180
+ fks = input.tables.map do |t|
181
+ t.tableSchema && t.tableSchema.foreign_keys_referencing(table)
182
+ end.flatten.compact
183
+ table_resource = table.id || RDF::Node.new
151
184
  add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
152
- Reader.new(options[:original_input], options.merge(
185
+ Reader.open(table.url, options.merge(
186
+ metadata: table,
187
+ base: table.url,
153
188
  no_found_metadata: true,
154
- table_resource: table_resource
189
+ table_resource: table_resource,
190
+ fks_referencing_table: fks,
191
+ warnings: @warnings,
192
+ errors: @errors,
155
193
  )) do |r|
156
194
  r.each_statement(&block)
157
195
  end
158
- else
159
- input.each_table do |table|
160
- next if table.suppressOutput
161
- table_resource = table.id || RDF::Node.new
162
- add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
163
- Reader.open(table.url, options.merge(
164
- format: :tabular,
165
- metadata: table,
166
- base: table.url,
167
- no_found_metadata: true,
168
- table_resource: table_resource
169
- )) do |r|
170
- r.each_statement(&block)
171
- end
172
- end
173
196
  end
174
197
 
175
- # Provenance
176
- if prov?
177
- activity = RDF::Node.new
178
- add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
179
- add_statement(0, activity, RDF.type, RDF::PROV.Activity)
180
- add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
181
- add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
182
- add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
183
-
184
- unless (urls = input.tables.map(&:url)).empty?
185
- usage = RDF::Node.new
186
- add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
187
- add_statement(0, usage, RDF.type, RDF::PROV.Usage)
188
- urls.each do |url|
189
- add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
190
- end
191
- add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
192
- end
198
+ # Lastly, if validating, validate foreign key integrity
199
+ validate_foreign_keys(input) if validate?
200
+ end
193
201
 
194
- unless Array(input.filenames).empty?
195
- usage = RDF::Node.new
196
- add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
197
- add_statement(0, usage, RDF.type, RDF::PROV.Usage)
198
- Array(input.filenames).each do |fn|
199
- add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
200
- end
201
- add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
202
+ # Provenance
203
+ if prov?
204
+ activity = RDF::Node.new
205
+ add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
206
+ add_statement(0, activity, RDF.type, RDF::PROV.Activity)
207
+ add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
208
+ add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
209
+ add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
210
+
211
+ unless (urls = input.tables.map(&:url)).empty?
212
+ usage = RDF::Node.new
213
+ add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
214
+ add_statement(0, usage, RDF.type, RDF::PROV.Usage)
215
+ urls.each do |url|
216
+ add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
202
217
  end
218
+ add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
203
219
  end
204
- ensure
205
- warnings = @options.fetch(:warnings, []).concat(input.warnings)
206
- if validate? && !warnings.empty? && !@options[:warnings]
207
- $stderr.puts "Warnings: #{warnings.join("\n")}"
220
+
221
+ unless Array(input.filenames).empty?
222
+ usage = RDF::Node.new
223
+ add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
224
+ add_statement(0, usage, RDF.type, RDF::PROV.Usage)
225
+ Array(input.filenames).each do |fn|
226
+ add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
227
+ end
228
+ add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
208
229
  end
209
230
  end
210
- when :Table
211
- Reader.open(input.url, options.merge(format: :tabular, metadata: input, base: input.url, no_found_metadata: true)) do |r|
212
- r.each_statement(&block)
231
+ ensure
232
+ warnings = @warnings.concat(input.warnings)
233
+ if validate? && !warnings.empty? && !@options[:warnings]
234
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
235
+ end
236
+ if validate? && !errors.empty? && !@options[:errors]
237
+ $stderr.puts "Errors: #{errors.join("\n")}"
213
238
  end
214
- else
215
- raise "Opened inappropriate metadata type: #{input.type}"
216
239
  end
217
240
  end
218
241
  return
@@ -220,7 +243,7 @@ module RDF::Tabular
220
243
 
221
244
  # Output Table-Level RDF triples
222
245
  table_resource = options.fetch(:table_resource, (metadata.id || RDF::Node.new))
223
- unless minimal?
246
+ unless minimal? || metadata.suppressOutput
224
247
  add_statement(0, table_resource, RDF.type, CSVW.Table)
225
248
  add_statement(0, table_resource, CSVW.url, RDF::URI(metadata.url))
226
249
  end
@@ -228,15 +251,24 @@ module RDF::Tabular
228
251
  # Input is file containing CSV data.
229
252
  # Output ROW-Level statements
230
253
  last_row_num = 0
254
+ primary_keys = []
231
255
  metadata.each_row(input) do |row|
232
256
  if row.is_a?(RDF::Statement)
233
257
  # May add additional comments
234
258
  row.subject = table_resource
235
- add_statement(last_row_num + 1, row)
259
+ add_statement(last_row_num + 1, row) unless metadata.suppressOutput
236
260
  next
237
261
  end
238
262
  last_row_num = row.sourceNumber
239
263
 
264
+ # Collect primary and foreign keys if validating
265
+ if validate?
266
+ primary_keys << row.primaryKey
267
+ collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
268
+ end
269
+
270
+ next if metadata.suppressOutput
271
+
240
272
  # Output row-level metadata
241
273
  row_resource = RDF::Node.new
242
274
  default_cell_subject = RDF::Node.new
@@ -245,8 +277,14 @@ module RDF::Tabular
245
277
  add_statement(row.sourceNumber, row_resource, CSVW.rownum, row.number)
246
278
  add_statement(row.sourceNumber, row_resource, RDF.type, CSVW.Row)
247
279
  add_statement(row.sourceNumber, row_resource, CSVW.url, row.id)
280
+ row.titles.each do |t|
281
+ add_statement(row.sourceNumber, row_resource, CSVW.title, t)
282
+ end
248
283
  end
249
284
  row.values.each_with_index do |cell, index|
285
+ # Collect cell errors
286
+ (validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
287
+ cell.errors.join("\n") unless Array(cell.errors).empty?
250
288
  next if cell.column.suppressOutput # Skip ignored cells
251
289
  cell_subject = cell.aboutUrl || default_cell_subject
252
290
  propertyUrl = cell.propertyUrl || RDF::URI("#{metadata.url}##{cell.column.name}")
@@ -269,6 +307,9 @@ module RDF::Tabular
269
307
  end
270
308
  end
271
309
 
310
+ # Validate primary keys
311
+ validate_primary_keys(metadata, primary_keys) if validate?
312
+
272
313
  # Common Properties
273
314
  metadata.each do |key, value|
274
315
  next unless key.to_s.include?(':') || key == :notes
@@ -278,6 +319,8 @@ module RDF::Tabular
278
319
  end unless minimal?
279
320
  end
280
321
  enum_for(:each_statement)
322
+ rescue IOError => e
323
+ raise RDF::ReaderError, e.message, e.backtrace
281
324
  end
282
325
 
283
326
  ##
@@ -292,6 +335,19 @@ module RDF::Tabular
292
335
  enum_for(:each_triple)
293
336
  end
294
337
 
338
+ ##
339
+ # Validate and raise an exception if any errors are found while processing either metadata or tables
340
+ # @return [self]
341
+ # @raise [Error]
342
+ def validate!
343
+ each_statement {} # Read all rows
344
+ raise Error, errors.join("\n") unless errors.empty?
345
+ self
346
+ rescue RDF::ReaderError => e
347
+ raise Error, e.message
348
+ self
349
+ end
350
+
295
351
  ##
296
352
  # Transform to JSON. Note that this must be run from within the reader context if the input is an open IO stream.
297
353
  #
@@ -317,6 +373,7 @@ module RDF::Tabular
317
373
  # @option options [::JSON::State] :state used when dumping
318
374
  # @option options [Boolean] :atd output Abstract Table representation instead
319
375
  # @return [String]
376
+ # @raise [RDF::Tabular::Error]
320
377
  def to_json(options = @options)
321
378
  io = case options
322
379
  when IO, StringIO then options
@@ -345,6 +402,8 @@ module RDF::Tabular
345
402
  hash = self.send(hash_fn, options)
346
403
  ::JSON.generate(hash, json_state)
347
404
  end
405
+ rescue IOError => e
406
+ raise RDF::Tabular::Error, e.message
348
407
  end
349
408
 
350
409
  ##
@@ -360,77 +419,69 @@ module RDF::Tabular
360
419
  debug("each_statement: metadata") {input.inspect}
361
420
  depth do
362
421
  # Get Metadata to invoke and open referenced files
363
- case input.type
364
- when :TableGroup
365
- begin
366
- # Validate metadata
367
- input.validate!
422
+ begin
423
+ # Validate metadata
424
+ input.validate!
425
+
426
+ tables = []
427
+ table_group = {}
428
+ table_group['@id'] = input.id.to_s if input.id
429
+
430
+ # Common Properties
431
+ input.each do |key, value|
432
+ next unless key.to_s.include?(':') || key == :notes
433
+ table_group[key] = input.common_properties(nil, key, value)
434
+ table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
435
+ end
368
436
 
369
- tables = []
370
- table_group = {}
371
- table_group['@id'] = input.id.to_s if input.id
437
+ table_group['tables'] = tables
372
438
 
373
- # Common Properties
374
- input.each do |key, value|
375
- next unless key.to_s.include?(':') || key == :notes
376
- table_group[key] = input.common_properties(nil, key, value)
377
- table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
439
+ if options[:original_input] && !input.describes_file?(options[:base_uri])
440
+ Reader.new(options[:original_input], options.merge(
441
+ metadata: input.tables.first,
442
+ base: input.tables.first.url,
443
+ minimal: minimal?,
444
+ no_found_metadata: true,
445
+ warnings: @warnings,
446
+ errors: @errors,
447
+ )) do |r|
448
+ case t = r.to_hash(options)
449
+ when Array then tables += t unless input.tables.first.suppressOutput
450
+ when Hash then tables << t unless input.tables.first.suppressOutput
451
+ end
378
452
  end
379
-
380
- table_group['table'] = tables
381
-
382
- if input.tables.empty? && options[:original_input]
383
- Reader.new(options[:original_input], options.merge(
384
- base: options.fetch(:base, "http://example.org/default-metadata"),
385
- minimal: minimal?,
386
- no_found_metadata: true
453
+ else
454
+ input.each_table do |table|
455
+ next if table.suppressOutput && !validate?
456
+ Reader.open(table.url, options.merge(
457
+ metadata: table,
458
+ base: table.url,
459
+ minimal: minimal?,
460
+ no_found_metadata: true,
461
+ warnings: @warnings,
462
+ errors: @errors,
387
463
  )) do |r|
388
- case table = r.to_hash(options)
389
- when Array then tables += table
390
- when Hash then tables << table
391
- end
392
- end
393
- else
394
- input.each_table do |table|
395
- next if table.suppressOutput
396
- Reader.open(table.url, options.merge(
397
- format: :tabular,
398
- metadata: table,
399
- base: table.url,
400
- minimal: minimal?,
401
- no_found_metadata: true
402
- )) do |r|
403
- case table = r.to_hash(options)
404
- when Array then tables += table
405
- when Hash then tables << table
406
- end
464
+ case t = r.to_hash(options)
465
+ when Array then tables += t unless table.suppressOutput
466
+ when Hash then tables << t unless table.suppressOutput
407
467
  end
408
468
  end
409
469
  end
470
+ end
410
471
 
411
- # Result is table_group or array
412
- minimal? ? tables : table_group
413
- ensure
414
- warnings = options.fetch(:warnings, []).concat(input.warnings)
415
- if validate? && !warnings.empty? && !@options[:warnings]
416
- $stderr.puts "Warnings: #{warnings.join("\n")}"
417
- end
472
+ # Lastly, if validating, validate foreign key integrity
473
+ validate_foreign_keys(input) if validate?
474
+
475
+ # Result is table_group or array
476
+ minimal? ? tables : table_group
477
+ ensure
478
+ warnings = @warnings.concat(input.warnings)
479
+ if validate? && !warnings.empty? && !@options[:warnings]
480
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
418
481
  end
419
- when :Table
420
- table = nil
421
- Reader.open(input.url, options.merge(
422
- format: :tabular,
423
- metadata: input,
424
- base: input.url,
425
- minimal: minimal?,
426
- no_found_metadata: true
427
- )) do |r|
428
- table = r.to_hash(options)
482
+ if validate? && !errors.empty? && !@options[:errors]
483
+ $stderr.puts "Errors: #{errors.join("\n")}"
429
484
  end
430
-
431
- table
432
- else
433
- raise "Opened inappropriate metadata type: #{input.type}"
434
485
  end
435
486
  end
436
487
  else
@@ -443,6 +494,7 @@ module RDF::Tabular
443
494
 
444
495
  # Input is file containing CSV data.
445
496
  # Output ROW-Level statements
497
+ primary_keys = []
446
498
  metadata.each_row(input) do |row|
447
499
  if row.is_a?(RDF::Statement)
448
500
  # May add additional comments
@@ -450,14 +502,28 @@ module RDF::Tabular
450
502
  table['rdfs:comment'] << row.object.to_s
451
503
  next
452
504
  end
505
+
506
+ # Collect primary and foreign keys if validating
507
+ if validate?
508
+ primary_keys << row.primaryKey
509
+ collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
510
+ end
511
+
453
512
  # Output row-level metadata
454
513
  r, a, values = {}, {}, {}
455
514
  r["url"] = row.id.to_s
456
515
  r["rownum"] = row.number
457
516
 
517
+ # Row titles
518
+ Array(row.titles).each { |t| merge_compacted_value(r, "titles", t.to_s) unless t.nil?}
519
+
458
520
  row.values.each_with_index do |cell, index|
459
521
  column = metadata.tableSchema.columns[index]
460
522
 
523
+ # Collect cell errors
524
+ (validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
525
+ cell.errors.join("\n") unless Array(cell.errors).empty?
526
+
461
527
  # Ignore suppressed columns
462
528
  next if column.suppressOutput
463
529
 
@@ -472,7 +538,7 @@ module RDF::Tabular
472
538
  co['@id'] = subject.to_s unless subject == 'null'
473
539
  prop = case cell.propertyUrl
474
540
  when RDF.type then '@type'
475
- when nil then column.name
541
+ when nil then URI.decode(column.name) # Use URI-decoded name
476
542
  else
477
543
  # Compact the property to a term or prefixed name
478
544
  metadata.context.compact_iri(cell.propertyUrl, vocab: true)
@@ -487,8 +553,12 @@ module RDF::Tabular
487
553
  values[cell.valueUrl.to_s][:count] += 1
488
554
  end
489
555
  cell.valueUrl.to_s
556
+ when cell.value.is_a?(RDF::Literal::Double)
557
+ cell.value.object.nan? || cell.value.object.infinite? ? cell.value : cell.value.object
558
+ when cell.value.is_a?(RDF::Literal::Integer)
559
+ cell.value.object.to_i
490
560
  when cell.value.is_a?(RDF::Literal::Numeric)
491
- cell.value.object
561
+ cell.value.object.to_f
492
562
  when cell.value.is_a?(RDF::Literal::Boolean)
493
563
  cell.value.object
494
564
  when cell.value
@@ -520,6 +590,9 @@ module RDF::Tabular
520
590
  end
521
591
  end
522
592
 
593
+ # Validate primary keys
594
+ validate_primary_keys(metadata, primary_keys) if validate?
595
+
523
596
  # Use string values notes and common properties
524
597
  metadata.each do |key, value|
525
598
  next unless key.to_s.include?(':') || key == :notes
@@ -545,7 +618,7 @@ module RDF::Tabular
545
618
  table_group = input.to_atd
546
619
  if input.tables.empty? && options[:original_input]
547
620
  Reader.new(options[:original_input], options.merge(
548
- base: options.fetch(:base, "http://example.org/default-metadata"),
621
+ base: options[:base],
549
622
  no_found_metadata: true
550
623
  )) do |r|
551
624
  table_group["tables"] << r.to_atd(options)
@@ -621,6 +694,65 @@ module RDF::Tabular
621
694
  @callback.call(statement)
622
695
  end
623
696
 
697
+ # Validate primary keys
698
+ def validate_primary_keys(metadata, primary_keys)
699
+ pk_strings = {}
700
+ primary_keys.reject(&:empty?).each do |row_pks|
701
+ pk_names = row_pks.map {|cell| cell.value}.join(",")
702
+ errors << "Table #{metadata.url} has duplicate primary key #{pk_names}" if pk_strings.has_key?(pk_names)
703
+ pk_strings[pk_names] ||= 0
704
+ pk_strings[pk_names] += 1
705
+ end
706
+ end
707
+
708
+ # Collect foreign key references
709
+ # @param [Table] metadata
710
+ # @param [Array<Hash>] foreign_keys referencing this table
711
+ # @param [Row] row
712
+ def collect_foreign_key_references(metadata, foreign_keys, row)
713
+ schema = metadata.tableSchema
714
+
715
+ # Add row as foreignKey source
716
+ Array(schema ? schema.foreignKeys : []).each do |fk|
717
+ colRef = Array(fk['columnReference'])
718
+
719
+ # Referenced cells, in order
720
+ cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
721
+ cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
722
+ next if cell_values.empty? # Don't record if empty
723
+ (fk[:reference_from] ||= {})[cell_values] ||= row
724
+ end
725
+
726
+ # Add row as foreignKey dest
727
+ Array(foreign_keys).each do |fk|
728
+ colRef = Array(fk['reference']['columnReference'])
729
+
730
+ # Referenced cells, in order
731
+ cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
732
+ fk[:reference_to] ||= {}
733
+ cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
734
+ next if cell_values.empty? # Don't record if empty
735
+ errors << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}): found duplicate foreign key target: #{cell_values.map(&:to_s).inspect}" if fk[:reference_to][cell_values]
736
+ fk[:reference_to][cell_values] ||= row
737
+ end
738
+ end
739
+
740
+ # Validate foreign keys
741
+ def validate_foreign_keys(metadata)
742
+ metadata.tables.each do |table|
743
+ next if (schema = table.tableSchema).nil?
744
+ schema.foreignKeys.each do |fk|
745
+ # Verify that reference_from entry exists in reference_to
746
+ fk.fetch(:reference_from, {}).each do |cell_values, row|
747
+ unless fk.fetch(:reference_to, {}).has_key?(cell_values)
748
+ errors << "Table #{table.url} row #{row.number}(src #{row.sourceNumber}): " +
749
+ "Foreign Key violation, expected to find #{cell_values.map(&:to_s).inspect}"
750
+ end
751
+ end
752
+ end if schema.foreignKeys
753
+ end
754
+ end
755
+
624
756
  # Merge values into compacted results, creating arrays if necessary
625
757
  def merge_compacted_value(hash, key, value)
626
758
  return unless hash