rdf-tabular 0.1.3.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,16 @@ module RDF::Tabular
19
19
  # @return [:read]
20
20
  attr_reader :input
21
21
 
22
+ ##
23
+ # Warnings found during processing
24
+ # @return [Array<String>]
25
+ attr_reader :warnings
26
+
27
+ ##
28
+ # Accumulated errors found during processing
29
+ # @return [Array<String>]
30
+ attr_reader :errors
31
+
22
32
  ##
23
33
  # Initializes the RDF::Tabular Reader instance.
24
34
  #
@@ -32,6 +42,8 @@ module RDF::Tabular
32
42
  # @option options [Boolean] :noProv do not output optional provenance information
33
43
  # @option options [Array] :warnings
34
44
  # array for placing warnings found when processing metadata. If not set, and validating, warnings are output to `$stderr`
45
+ # @option optinons [Array<Hash>] :fks_referencing_table
46
+ # When called with Table metadata, a list of the foreign keys referencing this table
35
47
  # @yield [reader] `self`
36
48
  # @yieldparam [RDF::Reader] reader
37
49
  # @yieldreturn [void] ignored
@@ -48,13 +60,14 @@ module RDF::Tabular
48
60
  end
49
61
 
50
62
  @options[:depth] ||= 0
63
+ @errors = @options.fetch(:errors, [])
64
+ @warnings = @options.fetch(:warnings, [])
51
65
 
52
66
  debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
53
67
 
54
68
  # Minimal implies noProv
55
69
  @options[:noProv] ||= @options[:minimal]
56
70
 
57
- #byebug if input.is_a?(Array)
58
71
  @input = case input
59
72
  when String then StringIO.new(input)
60
73
  when Array then StringIO.new(input.map {|r| r.join(",")}.join("\n"))
@@ -63,8 +76,8 @@ module RDF::Tabular
63
76
 
64
77
  depth do
65
78
  # If input is JSON, then the input is the metadata
66
- if @options[:base] =~ /\.json(?:ld)?$/ ||
67
- @input.respond_to?(:content_type) && @input.content_type =~ %r(application/(?:ld+)json)
79
+ content_type = @input.respond_to?(:content_type) ? @input.content_type : ""
80
+ if @options[:base] =~ /\.json(?:ld)?$/ || content_type =~ %r(application/(csvm\+|ld\+)?json)
68
81
  @metadata = Metadata.new(@input, @options.merge(filenames: @options[:base]))
69
82
  # If @metadata is for a Table, turn it into a TableGroup
70
83
  @metadata = @metadata.to_table_group if @metadata.is_a?(Table)
@@ -79,7 +92,8 @@ module RDF::Tabular
79
92
  dialect.header = false if (input.headers.fetch(:content_type, '').split(';').include?('header=absent') rescue false)
80
93
  dialect.encoding = input.charset if (input.charset rescue nil)
81
94
  dialect.separator = "\t" if (input.content_type == "text/tsv" rescue nil)
82
- embed_options = {base: "http://example.org/default-metadata"}.merge(@options)
95
+ embed_options = @options.dup
96
+ embed_options[:lang] = dialect_metadata.lang if dialect_metadata.lang
83
97
  embedded_metadata = dialect.embedded_metadata(input, @options[:metadata], embed_options)
84
98
 
85
99
  if (@metadata = @options[:metadata]) && @metadata.tableSchema
@@ -96,7 +110,7 @@ module RDF::Tabular
96
110
  @metadata.dialect = dialect
97
111
  else
98
112
  # It's tabluar data. Find metadata and proceed as if it was specified in the first place
99
- @options[:original_input] = @input
113
+ @options[:original_input] = @input unless @options[:metadata]
100
114
  @input = @metadata = Metadata.for_input(@input, @options).normalize!
101
115
  end
102
116
 
@@ -126,93 +140,102 @@ module RDF::Tabular
126
140
  debug("each_statement: metadata") {input.inspect}
127
141
 
128
142
  depth do
129
- # Get Metadata to invoke and open referenced files
130
- case input.type
131
- when :TableGroup
132
- begin
133
- # Validate metadata
134
- input.validate!
135
-
136
- # Use resolved @id of TableGroup, if available
137
- table_group = input.id || RDF::Node.new
138
- add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
139
-
140
- # Common Properties
141
- input.each do |key, value|
142
- next unless key.to_s.include?(':') || key == :notes
143
- input.common_properties(table_group, key, value) do |statement|
144
- add_statement(0, statement)
145
- end
146
- end unless minimal?
143
+ begin
144
+ # Validate metadata
145
+ input.validate!
146
+
147
+ # Use resolved @id of TableGroup, if available
148
+ table_group = input.id || RDF::Node.new
149
+ add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
150
+
151
+ # Common Properties
152
+ input.each do |key, value|
153
+ next unless key.to_s.include?(':') || key == :notes
154
+ input.common_properties(table_group, key, value) do |statement|
155
+ add_statement(0, statement)
156
+ end
157
+ end unless minimal?
147
158
 
148
- # If we were originally given tabular data as input, simply use that, rather than opening the table URL. This allows buffered data to be used as input
149
- if Array(input.tables).empty? && options[:original_input]
150
- table_resource = RDF::Node.new
159
+ # If we were originally given tabular data as input, simply use that, rather than opening the table URL. This allows buffered data to be used as input.
160
+ # This case also handles found metadata that doesn't describe the input file
161
+ if options[:original_input] && !input.describes_file?(options[:base_uri])
162
+ table_resource = RDF::Node.new
163
+ add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
164
+ Reader.new(options[:original_input], options.merge(
165
+ metadata: input.tables.first,
166
+ base: input.tables.first.url,
167
+ no_found_metadata: true,
168
+ table_resource: table_resource,
169
+ warnings: @warnings,
170
+ errors: @errors,
171
+ )) do |r|
172
+ r.each_statement(&block)
173
+ end
174
+ else
175
+ input.each_table do |table|
176
+ # If validating, continue on to process value restrictions
177
+ next if table.suppressOutput && !validate?
178
+
179
+ # Foreign Keys referencing this table
180
+ fks = input.tables.map do |t|
181
+ t.tableSchema && t.tableSchema.foreign_keys_referencing(table)
182
+ end.flatten.compact
183
+ table_resource = table.id || RDF::Node.new
151
184
  add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
152
- Reader.new(options[:original_input], options.merge(
185
+ Reader.open(table.url, options.merge(
186
+ metadata: table,
187
+ base: table.url,
153
188
  no_found_metadata: true,
154
- table_resource: table_resource
189
+ table_resource: table_resource,
190
+ fks_referencing_table: fks,
191
+ warnings: @warnings,
192
+ errors: @errors,
155
193
  )) do |r|
156
194
  r.each_statement(&block)
157
195
  end
158
- else
159
- input.each_table do |table|
160
- next if table.suppressOutput
161
- table_resource = table.id || RDF::Node.new
162
- add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
163
- Reader.open(table.url, options.merge(
164
- format: :tabular,
165
- metadata: table,
166
- base: table.url,
167
- no_found_metadata: true,
168
- table_resource: table_resource
169
- )) do |r|
170
- r.each_statement(&block)
171
- end
172
- end
173
196
  end
174
197
 
175
- # Provenance
176
- if prov?
177
- activity = RDF::Node.new
178
- add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
179
- add_statement(0, activity, RDF.type, RDF::PROV.Activity)
180
- add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
181
- add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
182
- add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
183
-
184
- unless (urls = input.tables.map(&:url)).empty?
185
- usage = RDF::Node.new
186
- add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
187
- add_statement(0, usage, RDF.type, RDF::PROV.Usage)
188
- urls.each do |url|
189
- add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
190
- end
191
- add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
192
- end
198
+ # Lastly, if validating, validate foreign key integrity
199
+ validate_foreign_keys(input) if validate?
200
+ end
193
201
 
194
- unless Array(input.filenames).empty?
195
- usage = RDF::Node.new
196
- add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
197
- add_statement(0, usage, RDF.type, RDF::PROV.Usage)
198
- Array(input.filenames).each do |fn|
199
- add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
200
- end
201
- add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
202
+ # Provenance
203
+ if prov?
204
+ activity = RDF::Node.new
205
+ add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
206
+ add_statement(0, activity, RDF.type, RDF::PROV.Activity)
207
+ add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
208
+ add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
209
+ add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
210
+
211
+ unless (urls = input.tables.map(&:url)).empty?
212
+ usage = RDF::Node.new
213
+ add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
214
+ add_statement(0, usage, RDF.type, RDF::PROV.Usage)
215
+ urls.each do |url|
216
+ add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
202
217
  end
218
+ add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
203
219
  end
204
- ensure
205
- warnings = @options.fetch(:warnings, []).concat(input.warnings)
206
- if validate? && !warnings.empty? && !@options[:warnings]
207
- $stderr.puts "Warnings: #{warnings.join("\n")}"
220
+
221
+ unless Array(input.filenames).empty?
222
+ usage = RDF::Node.new
223
+ add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
224
+ add_statement(0, usage, RDF.type, RDF::PROV.Usage)
225
+ Array(input.filenames).each do |fn|
226
+ add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
227
+ end
228
+ add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
208
229
  end
209
230
  end
210
- when :Table
211
- Reader.open(input.url, options.merge(format: :tabular, metadata: input, base: input.url, no_found_metadata: true)) do |r|
212
- r.each_statement(&block)
231
+ ensure
232
+ warnings = @warnings.concat(input.warnings)
233
+ if validate? && !warnings.empty? && !@options[:warnings]
234
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
235
+ end
236
+ if validate? && !errors.empty? && !@options[:errors]
237
+ $stderr.puts "Errors: #{errors.join("\n")}"
213
238
  end
214
- else
215
- raise "Opened inappropriate metadata type: #{input.type}"
216
239
  end
217
240
  end
218
241
  return
@@ -220,7 +243,7 @@ module RDF::Tabular
220
243
 
221
244
  # Output Table-Level RDF triples
222
245
  table_resource = options.fetch(:table_resource, (metadata.id || RDF::Node.new))
223
- unless minimal?
246
+ unless minimal? || metadata.suppressOutput
224
247
  add_statement(0, table_resource, RDF.type, CSVW.Table)
225
248
  add_statement(0, table_resource, CSVW.url, RDF::URI(metadata.url))
226
249
  end
@@ -228,15 +251,24 @@ module RDF::Tabular
228
251
  # Input is file containing CSV data.
229
252
  # Output ROW-Level statements
230
253
  last_row_num = 0
254
+ primary_keys = []
231
255
  metadata.each_row(input) do |row|
232
256
  if row.is_a?(RDF::Statement)
233
257
  # May add additional comments
234
258
  row.subject = table_resource
235
- add_statement(last_row_num + 1, row)
259
+ add_statement(last_row_num + 1, row) unless metadata.suppressOutput
236
260
  next
237
261
  end
238
262
  last_row_num = row.sourceNumber
239
263
 
264
+ # Collect primary and foreign keys if validating
265
+ if validate?
266
+ primary_keys << row.primaryKey
267
+ collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
268
+ end
269
+
270
+ next if metadata.suppressOutput
271
+
240
272
  # Output row-level metadata
241
273
  row_resource = RDF::Node.new
242
274
  default_cell_subject = RDF::Node.new
@@ -245,8 +277,14 @@ module RDF::Tabular
245
277
  add_statement(row.sourceNumber, row_resource, CSVW.rownum, row.number)
246
278
  add_statement(row.sourceNumber, row_resource, RDF.type, CSVW.Row)
247
279
  add_statement(row.sourceNumber, row_resource, CSVW.url, row.id)
280
+ row.titles.each do |t|
281
+ add_statement(row.sourceNumber, row_resource, CSVW.title, t)
282
+ end
248
283
  end
249
284
  row.values.each_with_index do |cell, index|
285
+ # Collect cell errors
286
+ (validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
287
+ cell.errors.join("\n") unless Array(cell.errors).empty?
250
288
  next if cell.column.suppressOutput # Skip ignored cells
251
289
  cell_subject = cell.aboutUrl || default_cell_subject
252
290
  propertyUrl = cell.propertyUrl || RDF::URI("#{metadata.url}##{cell.column.name}")
@@ -269,6 +307,9 @@ module RDF::Tabular
269
307
  end
270
308
  end
271
309
 
310
+ # Validate primary keys
311
+ validate_primary_keys(metadata, primary_keys) if validate?
312
+
272
313
  # Common Properties
273
314
  metadata.each do |key, value|
274
315
  next unless key.to_s.include?(':') || key == :notes
@@ -278,6 +319,8 @@ module RDF::Tabular
278
319
  end unless minimal?
279
320
  end
280
321
  enum_for(:each_statement)
322
+ rescue IOError => e
323
+ raise RDF::ReaderError, e.message, e.backtrace
281
324
  end
282
325
 
283
326
  ##
@@ -292,6 +335,19 @@ module RDF::Tabular
292
335
  enum_for(:each_triple)
293
336
  end
294
337
 
338
+ ##
339
+ # Validate and raise an exception if any errors are found while processing either metadata or tables
340
+ # @return [self]
341
+ # @raise [Error]
342
+ def validate!
343
+ each_statement {} # Read all rows
344
+ raise Error, errors.join("\n") unless errors.empty?
345
+ self
346
+ rescue RDF::ReaderError => e
347
+ raise Error, e.message
348
+ self
349
+ end
350
+
295
351
  ##
296
352
  # Transform to JSON. Note that this must be run from within the reader context if the input is an open IO stream.
297
353
  #
@@ -317,6 +373,7 @@ module RDF::Tabular
317
373
  # @option options [::JSON::State] :state used when dumping
318
374
  # @option options [Boolean] :atd output Abstract Table representation instead
319
375
  # @return [String]
376
+ # @raise [RDF::Tabular::Error]
320
377
  def to_json(options = @options)
321
378
  io = case options
322
379
  when IO, StringIO then options
@@ -345,6 +402,8 @@ module RDF::Tabular
345
402
  hash = self.send(hash_fn, options)
346
403
  ::JSON.generate(hash, json_state)
347
404
  end
405
+ rescue IOError => e
406
+ raise RDF::Tabular::Error, e.message
348
407
  end
349
408
 
350
409
  ##
@@ -360,77 +419,69 @@ module RDF::Tabular
360
419
  debug("each_statement: metadata") {input.inspect}
361
420
  depth do
362
421
  # Get Metadata to invoke and open referenced files
363
- case input.type
364
- when :TableGroup
365
- begin
366
- # Validate metadata
367
- input.validate!
422
+ begin
423
+ # Validate metadata
424
+ input.validate!
425
+
426
+ tables = []
427
+ table_group = {}
428
+ table_group['@id'] = input.id.to_s if input.id
429
+
430
+ # Common Properties
431
+ input.each do |key, value|
432
+ next unless key.to_s.include?(':') || key == :notes
433
+ table_group[key] = input.common_properties(nil, key, value)
434
+ table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
435
+ end
368
436
 
369
- tables = []
370
- table_group = {}
371
- table_group['@id'] = input.id.to_s if input.id
437
+ table_group['tables'] = tables
372
438
 
373
- # Common Properties
374
- input.each do |key, value|
375
- next unless key.to_s.include?(':') || key == :notes
376
- table_group[key] = input.common_properties(nil, key, value)
377
- table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
439
+ if options[:original_input] && !input.describes_file?(options[:base_uri])
440
+ Reader.new(options[:original_input], options.merge(
441
+ metadata: input.tables.first,
442
+ base: input.tables.first.url,
443
+ minimal: minimal?,
444
+ no_found_metadata: true,
445
+ warnings: @warnings,
446
+ errors: @errors,
447
+ )) do |r|
448
+ case t = r.to_hash(options)
449
+ when Array then tables += t unless input.tables.first.suppressOutput
450
+ when Hash then tables << t unless input.tables.first.suppressOutput
451
+ end
378
452
  end
379
-
380
- table_group['table'] = tables
381
-
382
- if input.tables.empty? && options[:original_input]
383
- Reader.new(options[:original_input], options.merge(
384
- base: options.fetch(:base, "http://example.org/default-metadata"),
385
- minimal: minimal?,
386
- no_found_metadata: true
453
+ else
454
+ input.each_table do |table|
455
+ next if table.suppressOutput && !validate?
456
+ Reader.open(table.url, options.merge(
457
+ metadata: table,
458
+ base: table.url,
459
+ minimal: minimal?,
460
+ no_found_metadata: true,
461
+ warnings: @warnings,
462
+ errors: @errors,
387
463
  )) do |r|
388
- case table = r.to_hash(options)
389
- when Array then tables += table
390
- when Hash then tables << table
391
- end
392
- end
393
- else
394
- input.each_table do |table|
395
- next if table.suppressOutput
396
- Reader.open(table.url, options.merge(
397
- format: :tabular,
398
- metadata: table,
399
- base: table.url,
400
- minimal: minimal?,
401
- no_found_metadata: true
402
- )) do |r|
403
- case table = r.to_hash(options)
404
- when Array then tables += table
405
- when Hash then tables << table
406
- end
464
+ case t = r.to_hash(options)
465
+ when Array then tables += t unless table.suppressOutput
466
+ when Hash then tables << t unless table.suppressOutput
407
467
  end
408
468
  end
409
469
  end
470
+ end
410
471
 
411
- # Result is table_group or array
412
- minimal? ? tables : table_group
413
- ensure
414
- warnings = options.fetch(:warnings, []).concat(input.warnings)
415
- if validate? && !warnings.empty? && !@options[:warnings]
416
- $stderr.puts "Warnings: #{warnings.join("\n")}"
417
- end
472
+ # Lastly, if validating, validate foreign key integrity
473
+ validate_foreign_keys(input) if validate?
474
+
475
+ # Result is table_group or array
476
+ minimal? ? tables : table_group
477
+ ensure
478
+ warnings = @warnings.concat(input.warnings)
479
+ if validate? && !warnings.empty? && !@options[:warnings]
480
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
418
481
  end
419
- when :Table
420
- table = nil
421
- Reader.open(input.url, options.merge(
422
- format: :tabular,
423
- metadata: input,
424
- base: input.url,
425
- minimal: minimal?,
426
- no_found_metadata: true
427
- )) do |r|
428
- table = r.to_hash(options)
482
+ if validate? && !errors.empty? && !@options[:errors]
483
+ $stderr.puts "Errors: #{errors.join("\n")}"
429
484
  end
430
-
431
- table
432
- else
433
- raise "Opened inappropriate metadata type: #{input.type}"
434
485
  end
435
486
  end
436
487
  else
@@ -443,6 +494,7 @@ module RDF::Tabular
443
494
 
444
495
  # Input is file containing CSV data.
445
496
  # Output ROW-Level statements
497
+ primary_keys = []
446
498
  metadata.each_row(input) do |row|
447
499
  if row.is_a?(RDF::Statement)
448
500
  # May add additional comments
@@ -450,14 +502,28 @@ module RDF::Tabular
450
502
  table['rdfs:comment'] << row.object.to_s
451
503
  next
452
504
  end
505
+
506
+ # Collect primary and foreign keys if validating
507
+ if validate?
508
+ primary_keys << row.primaryKey
509
+ collect_foreign_key_references(metadata, options[:fks_referencing_table], row)
510
+ end
511
+
453
512
  # Output row-level metadata
454
513
  r, a, values = {}, {}, {}
455
514
  r["url"] = row.id.to_s
456
515
  r["rownum"] = row.number
457
516
 
517
+ # Row titles
518
+ Array(row.titles).each { |t| merge_compacted_value(r, "titles", t.to_s) unless t.nil?}
519
+
458
520
  row.values.each_with_index do |cell, index|
459
521
  column = metadata.tableSchema.columns[index]
460
522
 
523
+ # Collect cell errors
524
+ (validate? ? errors : warnings) << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}, col #{cell.column.sourceNumber}): " +
525
+ cell.errors.join("\n") unless Array(cell.errors).empty?
526
+
461
527
  # Ignore suppressed columns
462
528
  next if column.suppressOutput
463
529
 
@@ -472,7 +538,7 @@ module RDF::Tabular
472
538
  co['@id'] = subject.to_s unless subject == 'null'
473
539
  prop = case cell.propertyUrl
474
540
  when RDF.type then '@type'
475
- when nil then column.name
541
+ when nil then URI.decode(column.name) # Use URI-decoded name
476
542
  else
477
543
  # Compact the property to a term or prefixed name
478
544
  metadata.context.compact_iri(cell.propertyUrl, vocab: true)
@@ -487,8 +553,12 @@ module RDF::Tabular
487
553
  values[cell.valueUrl.to_s][:count] += 1
488
554
  end
489
555
  cell.valueUrl.to_s
556
+ when cell.value.is_a?(RDF::Literal::Double)
557
+ cell.value.object.nan? || cell.value.object.infinite? ? cell.value : cell.value.object
558
+ when cell.value.is_a?(RDF::Literal::Integer)
559
+ cell.value.object.to_i
490
560
  when cell.value.is_a?(RDF::Literal::Numeric)
491
- cell.value.object
561
+ cell.value.object.to_f
492
562
  when cell.value.is_a?(RDF::Literal::Boolean)
493
563
  cell.value.object
494
564
  when cell.value
@@ -520,6 +590,9 @@ module RDF::Tabular
520
590
  end
521
591
  end
522
592
 
593
+ # Validate primary keys
594
+ validate_primary_keys(metadata, primary_keys) if validate?
595
+
523
596
  # Use string values notes and common properties
524
597
  metadata.each do |key, value|
525
598
  next unless key.to_s.include?(':') || key == :notes
@@ -545,7 +618,7 @@ module RDF::Tabular
545
618
  table_group = input.to_atd
546
619
  if input.tables.empty? && options[:original_input]
547
620
  Reader.new(options[:original_input], options.merge(
548
- base: options.fetch(:base, "http://example.org/default-metadata"),
621
+ base: options[:base],
549
622
  no_found_metadata: true
550
623
  )) do |r|
551
624
  table_group["tables"] << r.to_atd(options)
@@ -621,6 +694,65 @@ module RDF::Tabular
621
694
  @callback.call(statement)
622
695
  end
623
696
 
697
+ # Validate primary keys
698
+ def validate_primary_keys(metadata, primary_keys)
699
+ pk_strings = {}
700
+ primary_keys.reject(&:empty?).each do |row_pks|
701
+ pk_names = row_pks.map {|cell| cell.value}.join(",")
702
+ errors << "Table #{metadata.url} has duplicate primary key #{pk_names}" if pk_strings.has_key?(pk_names)
703
+ pk_strings[pk_names] ||= 0
704
+ pk_strings[pk_names] += 1
705
+ end
706
+ end
707
+
708
+ # Collect foreign key references
709
+ # @param [Table] metadata
710
+ # @param [Array<Hash>] foreign_keys referencing this table
711
+ # @param [Row] row
712
+ def collect_foreign_key_references(metadata, foreign_keys, row)
713
+ schema = metadata.tableSchema
714
+
715
+ # Add row as foreignKey source
716
+ Array(schema ? schema.foreignKeys : []).each do |fk|
717
+ colRef = Array(fk['columnReference'])
718
+
719
+ # Referenced cells, in order
720
+ cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
721
+ cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
722
+ next if cell_values.empty? # Don't record if empty
723
+ (fk[:reference_from] ||= {})[cell_values] ||= row
724
+ end
725
+
726
+ # Add row as foreignKey dest
727
+ Array(foreign_keys).each do |fk|
728
+ colRef = Array(fk['reference']['columnReference'])
729
+
730
+ # Referenced cells, in order
731
+ cells = colRef.map {|n| row.values.detect {|cell| cell.column.name == n}}.compact
732
+ fk[:reference_to] ||= {}
733
+ cell_values = cells.map {|cell| cell.stringValue unless cell.stringValue.to_s.empty?}.compact
734
+ next if cell_values.empty? # Don't record if empty
735
+ errors << "Table #{metadata.url} row #{row.number}(src #{row.sourceNumber}): found duplicate foreign key target: #{cell_values.map(&:to_s).inspect}" if fk[:reference_to][cell_values]
736
+ fk[:reference_to][cell_values] ||= row
737
+ end
738
+ end
739
+
740
+ # Validate foreign keys
741
+ def validate_foreign_keys(metadata)
742
+ metadata.tables.each do |table|
743
+ next if (schema = table.tableSchema).nil?
744
+ schema.foreignKeys.each do |fk|
745
+ # Verify that reference_from entry exists in reference_to
746
+ fk.fetch(:reference_from, {}).each do |cell_values, row|
747
+ unless fk.fetch(:reference_to, {}).has_key?(cell_values)
748
+ errors << "Table #{table.url} row #{row.number}(src #{row.sourceNumber}): " +
749
+ "Foreign Key violation, expected to find #{cell_values.map(&:to_s).inspect}"
750
+ end
751
+ end
752
+ end if schema.foreignKeys
753
+ end
754
+ end
755
+
624
756
  # Merge values into compacted results, creating arrays if necessary
625
757
  def merge_compacted_value(hash, key, value)
626
758
  return unless hash