rdf-tabular 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/README.md +73 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/etc/csvw.jsonld +1507 -0
- data/etc/doap.csv +5 -0
- data/etc/doap.csv-metadata.json +34 -0
- data/etc/doap.ttl +35 -0
- data/lib/rdf/tabular.rb +34 -0
- data/lib/rdf/tabular/csvw.rb +477 -0
- data/lib/rdf/tabular/format.rb +46 -0
- data/lib/rdf/tabular/json.rb +0 -0
- data/lib/rdf/tabular/literal.rb +38 -0
- data/lib/rdf/tabular/metadata.rb +2038 -0
- data/lib/rdf/tabular/reader.rb +591 -0
- data/lib/rdf/tabular/utils.rb +33 -0
- data/lib/rdf/tabular/version.rb +18 -0
- data/spec/format_spec.rb +30 -0
- data/spec/matchers.rb +134 -0
- data/spec/metadata_spec.rb +1716 -0
- data/spec/reader_spec.rb +221 -0
- data/spec/spec_helper.rb +47 -0
- data/spec/suite_helper.rb +161 -0
- data/spec/suite_spec.rb +76 -0
- metadata +269 -0
@@ -0,0 +1,591 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
|
3
|
+
module RDF::Tabular
|
4
|
+
##
|
5
|
+
# A Tabular Data to RDF parser in Ruby.
|
6
|
+
#
|
7
|
+
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
8
|
+
class Reader < RDF::Reader
|
9
|
+
format Format
|
10
|
+
include Utils
|
11
|
+
|
12
|
+
# Metadata associated with the CSV
|
13
|
+
#
|
14
|
+
# @return [Metadata]
|
15
|
+
attr_reader :metadata
|
16
|
+
|
17
|
+
##
|
18
|
+
# Input open to read
|
19
|
+
# @return [:read]
|
20
|
+
attr_reader :input
|
21
|
+
|
22
|
+
##
|
23
|
+
# Initializes the RDF::Tabular Reader instance.
|
24
|
+
#
|
25
|
+
# @param [Util::File::RemoteDoc, IO, StringIO, Array<Array<String>>] input
|
26
|
+
# An opened file possibly JSON Metadata,
|
27
|
+
# or an Array used as an internalized array of arrays
|
28
|
+
# @param [Hash{Symbol => Object}] options
|
29
|
+
# any additional options (see `RDF::Reader#initialize`)
|
30
|
+
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
31
|
+
# @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
|
32
|
+
# @option options [Boolean] :noProv do not output optional provenance information
|
33
|
+
# @yield [reader] `self`
|
34
|
+
# @yieldparam [RDF::Reader] reader
|
35
|
+
# @yieldreturn [void] ignored
|
36
|
+
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
37
|
+
def initialize(input = $stdin, options = {}, &block)
|
38
|
+
super do
|
39
|
+
# Base would be how we are to take this
|
40
|
+
@options[:base] ||= base_uri.to_s if base_uri
|
41
|
+
@options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
|
42
|
+
@options[:base] ||= input.path if input.respond_to?(:path)
|
43
|
+
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
44
|
+
if RDF::URI(@options[:base]).relative? && File.exist?(@options[:base])
|
45
|
+
@options[:base] = "file:/#{File.expand_path(@options[:base])}"
|
46
|
+
end
|
47
|
+
|
48
|
+
@options[:depth] ||= 0
|
49
|
+
|
50
|
+
debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
|
51
|
+
|
52
|
+
# Minimal implies noProv
|
53
|
+
@options[:noProv] ||= @options[:minimal]
|
54
|
+
|
55
|
+
@input = input.is_a?(String) ? StringIO.new(input) : input
|
56
|
+
|
57
|
+
depth do
|
58
|
+
# If input is JSON, then the input is the metadata
|
59
|
+
if @options[:base] =~ /\.json(?:ld)?$/ ||
|
60
|
+
@input.respond_to?(:content_type) && @input.content_type =~ %r(application/(?:ld+)json)
|
61
|
+
@metadata = Metadata.new(@input, @options.merge(filenames: @options[:base]))
|
62
|
+
# If @metadata is for a Table, merge with something empty to create a TableGroup metadata
|
63
|
+
if @metadata.is_a?(TableGroup)
|
64
|
+
@metadata.normalize!
|
65
|
+
else
|
66
|
+
@metadata = @metadata.merge(TableGroup.new({}))
|
67
|
+
end
|
68
|
+
@input = @metadata
|
69
|
+
elsif @options[:no_found_metadata]
|
70
|
+
# Extract embedded metadata and merge
|
71
|
+
table_metadata = @options[:metadata]
|
72
|
+
embedded_metadata = table_metadata.dialect.embedded_metadata(input, @options)
|
73
|
+
@metadata = table_metadata.dup.merge!(embedded_metadata)
|
74
|
+
else
|
75
|
+
# HTTP flags
|
76
|
+
if @input.respond_to?(:headers) &&
|
77
|
+
input.headers.fetch(:content_type, '').split(';').include?('header=absent')
|
78
|
+
@options[:metadata] ||= Table.new(url: @options[:base])
|
79
|
+
@options[:metadata].dialect.header = false
|
80
|
+
end
|
81
|
+
|
82
|
+
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
83
|
+
@metadata = Metadata.for_input(@input, @options)
|
84
|
+
@input = @metadata
|
85
|
+
end
|
86
|
+
|
87
|
+
debug("Reader#initialize") {"input: #{input}, metadata: #{metadata.inspect}"}
|
88
|
+
|
89
|
+
if block_given?
|
90
|
+
case block.arity
|
91
|
+
when 0 then instance_eval(&block)
|
92
|
+
else block.call(self)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
# @private
|
101
|
+
# @see RDF::Reader#each_statement
|
102
|
+
def each_statement(&block)
|
103
|
+
if block_given?
|
104
|
+
@callback = block
|
105
|
+
|
106
|
+
start_time = Time.now
|
107
|
+
|
108
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
109
|
+
if input.is_a?(Metadata)
|
110
|
+
debug("each_statement: metadata") {input.inspect}
|
111
|
+
|
112
|
+
# Validate metadata
|
113
|
+
input.validate!
|
114
|
+
|
115
|
+
depth do
|
116
|
+
# Get Metadata to invoke and open referenced files
|
117
|
+
case input.type
|
118
|
+
when :TableGroup
|
119
|
+
# Use resolved @id of TableGroup, if available
|
120
|
+
table_group = input.id || RDF::Node.new
|
121
|
+
add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
|
122
|
+
|
123
|
+
# Common Properties
|
124
|
+
input.each do |key, value|
|
125
|
+
next unless key.to_s.include?(':') || key == :notes
|
126
|
+
input.common_properties(table_group, key, value) do |statement|
|
127
|
+
add_statement(0, statement)
|
128
|
+
end
|
129
|
+
end unless minimal?
|
130
|
+
|
131
|
+
input.each_resource do |table|
|
132
|
+
next if table.suppressOutput
|
133
|
+
table_resource = table.id || RDF::Node.new
|
134
|
+
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
135
|
+
Reader.open(table.url, options.merge(
|
136
|
+
format: :tabular,
|
137
|
+
metadata: table,
|
138
|
+
base: table.url,
|
139
|
+
no_found_metadata: true,
|
140
|
+
table_resource: table_resource
|
141
|
+
)) do |r|
|
142
|
+
r.each_statement(&block)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Provenance
|
147
|
+
if prov?
|
148
|
+
activity = RDF::Node.new
|
149
|
+
add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
|
150
|
+
add_statement(0, activity, RDF.type, RDF::PROV.Activity)
|
151
|
+
add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
|
152
|
+
add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
153
|
+
add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
154
|
+
|
155
|
+
unless (urls = input.resources.map(&:url)).empty?
|
156
|
+
usage = RDF::Node.new
|
157
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
158
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
159
|
+
urls.each do |url|
|
160
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
|
161
|
+
end
|
162
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
|
163
|
+
end
|
164
|
+
|
165
|
+
unless Array(input.filenames).empty?
|
166
|
+
usage = RDF::Node.new
|
167
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
168
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
169
|
+
Array(input.filenames).each do |fn|
|
170
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
|
171
|
+
end
|
172
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
when :Table
|
176
|
+
Reader.open(input.url, options.merge(format: :tabular, metadata: input, base: input.url, no_found_metadata: true)) do |r|
|
177
|
+
r.each_statement(&block)
|
178
|
+
end
|
179
|
+
else
|
180
|
+
raise "Opened inappropriate metadata type: #{input.type}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
return
|
184
|
+
end
|
185
|
+
|
186
|
+
# Output Table-Level RDF triples
|
187
|
+
table_resource = options.fetch(:table_resource, (metadata.id || RDF::Node.new))
|
188
|
+
unless minimal?
|
189
|
+
add_statement(0, table_resource, RDF.type, CSVW.Table)
|
190
|
+
add_statement(0, table_resource, CSVW.url, RDF::URI(metadata.url))
|
191
|
+
end
|
192
|
+
|
193
|
+
# Common Properties
|
194
|
+
metadata.each do |key, value|
|
195
|
+
next unless key.to_s.include?(':') || key == :notes
|
196
|
+
metadata.common_properties(table_resource, key, value) do |statement|
|
197
|
+
add_statement(0, statement)
|
198
|
+
end
|
199
|
+
end unless minimal?
|
200
|
+
|
201
|
+
# Input is file containing CSV data.
|
202
|
+
# Output ROW-Level statements
|
203
|
+
last_row_num = 0
|
204
|
+
metadata.each_row(input) do |row|
|
205
|
+
if row.is_a?(RDF::Statement)
|
206
|
+
# May add additional comments
|
207
|
+
row.subject = table_resource
|
208
|
+
add_statement(last_row_num + 1, row)
|
209
|
+
next
|
210
|
+
end
|
211
|
+
last_row_num = row.sourceNumber
|
212
|
+
|
213
|
+
# Output row-level metadata
|
214
|
+
row_resource = RDF::Node.new
|
215
|
+
default_cell_subject = RDF::Node.new
|
216
|
+
unless minimal?
|
217
|
+
add_statement(row.sourceNumber, table_resource, CSVW.row, row_resource)
|
218
|
+
add_statement(row.sourceNumber, row_resource, CSVW.rownum, row.number)
|
219
|
+
add_statement(row.sourceNumber, row_resource, CSVW.url, row.id)
|
220
|
+
end
|
221
|
+
row.values.each_with_index do |cell, index|
|
222
|
+
next if cell.column.suppressOutput # Skip ignored cells
|
223
|
+
cell_subject = cell.aboutUrl || default_cell_subject
|
224
|
+
propertyUrl = cell.propertyUrl || RDF::URI("#{metadata.url}##{cell.column.name}")
|
225
|
+
add_statement(row.sourceNumber, row_resource, CSVW.describes, cell_subject) unless minimal?
|
226
|
+
|
227
|
+
if cell.column.valueUrl
|
228
|
+
add_statement(row.sourceNumber, cell_subject, propertyUrl, cell.valueUrl) if cell.valueUrl
|
229
|
+
elsif cell.column.ordered && cell.column.separator
|
230
|
+
list = RDF::List[*Array(cell.value)]
|
231
|
+
add_statement(row.sourceNumber, cell_subject, propertyUrl, list.subject)
|
232
|
+
list.each_statement do |statement|
|
233
|
+
next if statement.predicate == RDF.type && statement.object == RDF.List
|
234
|
+
add_statement(row.sourceNumber, statement.subject, statement.predicate, statement.object)
|
235
|
+
end
|
236
|
+
else
|
237
|
+
Array(cell.value).each do |v|
|
238
|
+
add_statement(row.sourceNumber, cell_subject, propertyUrl, v)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
enum_for(:each_statement)
|
245
|
+
end
|
246
|
+
|
247
|
+
##
|
248
|
+
# @private
|
249
|
+
# @see RDF::Reader#each_triple
|
250
|
+
def each_triple(&block)
|
251
|
+
if block_given?
|
252
|
+
each_statement do |statement|
|
253
|
+
block.call(*statement.to_triple)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
enum_for(:each_triple)
|
257
|
+
end
|
258
|
+
|
259
|
+
##
|
260
|
+
# Transform to JSON. Note that this must be run from within the reader context if the input is an open IO stream.
|
261
|
+
#
|
262
|
+
# @example outputing annotated CSV as JSON
|
263
|
+
# result = nil
|
264
|
+
# RDF::Tabular::Reader.open("etc/doap.csv") do |reader|
|
265
|
+
# result = reader.to_json
|
266
|
+
# end
|
267
|
+
# result #=> {...}
|
268
|
+
#
|
269
|
+
# @example outputing annotated CSV as JSON from an in-memory structure
|
270
|
+
# csv = %(
|
271
|
+
# GID,On Street,Species,Trim Cycle,Inventory Date
|
272
|
+
# 1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
|
273
|
+
# 2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
|
274
|
+
# 3,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
|
275
|
+
# ).gsub(/^\s+/, '')
|
276
|
+
# r = RDF::Tabular::Reader.new(csv)
|
277
|
+
# r.to_json #=> {...}
|
278
|
+
#
|
279
|
+
# @param [Hash{Symbol => Object}] options may also be a JSON state
|
280
|
+
# @option options [IO, StringIO] io to output to file
|
281
|
+
# @option options [::JSON::State] :state used when dumping
|
282
|
+
# @option options [Boolean] :atd output Abstract Table representation instead
|
283
|
+
# @return [String]
|
284
|
+
def to_json(options = {})
|
285
|
+
io = case options
|
286
|
+
when IO, StringIO then options
|
287
|
+
when Hash then options[:io]
|
288
|
+
end
|
289
|
+
json_state = case options
|
290
|
+
when Hash
|
291
|
+
case
|
292
|
+
when options.has_key?(:state) then options[:state]
|
293
|
+
when options.has_key?(:indent) then options
|
294
|
+
else ::JSON::LD::JSON_STATE
|
295
|
+
end
|
296
|
+
when ::JSON::State, ::JSON::Ext::Generator::State, ::JSON::Pure::Generator::State
|
297
|
+
options
|
298
|
+
else ::JSON::LD::JSON_STATE
|
299
|
+
end
|
300
|
+
options = {} unless options.is_a?(Hash)
|
301
|
+
|
302
|
+
hash_fn = options[:atd] ? :to_atd : :to_hash
|
303
|
+
options = options.merge(noProv: @options[:noProv])
|
304
|
+
|
305
|
+
if io
|
306
|
+
::JSON::dump_default_options = json_state
|
307
|
+
::JSON.dump(self.send(hash_fn, options), io)
|
308
|
+
else
|
309
|
+
hash = self.send(hash_fn, options)
|
310
|
+
::JSON.generate(hash, json_state)
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
##
|
315
|
+
# Return a hash representation of the data for JSON serialization
|
316
|
+
#
|
317
|
+
# Produces an array if run in minimal mode.
|
318
|
+
#
|
319
|
+
# @param [Hash{Symbol => Object}] options
|
320
|
+
# @return [Hash, Array]
|
321
|
+
def to_hash(options = {})
|
322
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
323
|
+
if input.is_a?(Metadata)
|
324
|
+
debug("each_statement: metadata") {input.inspect}
|
325
|
+
depth do
|
326
|
+
# Get Metadata to invoke and open referenced files
|
327
|
+
case input.type
|
328
|
+
when :TableGroup
|
329
|
+
# Validate metadata
|
330
|
+
input.validate!
|
331
|
+
|
332
|
+
tables = []
|
333
|
+
table_group = {}
|
334
|
+
table_group['@id'] = input.id.to_s if input.id
|
335
|
+
|
336
|
+
# Common Properties
|
337
|
+
input.each do |key, value|
|
338
|
+
next unless key.to_s.include?(':') || key == :notes
|
339
|
+
table_group[key] = input.common_properties(nil, key, value)
|
340
|
+
table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
|
341
|
+
end
|
342
|
+
|
343
|
+
table_group['table'] = tables
|
344
|
+
|
345
|
+
input.each_resource do |table|
|
346
|
+
next if table.suppressOutput
|
347
|
+
Reader.open(table.url, options.merge(
|
348
|
+
format: :tabular,
|
349
|
+
metadata: table,
|
350
|
+
base: table.url,
|
351
|
+
minimal: minimal?,
|
352
|
+
no_found_metadata: true
|
353
|
+
)) do |r|
|
354
|
+
case table = r.to_hash(options)
|
355
|
+
when Array then tables += table
|
356
|
+
when Hash then tables << table
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
# Result is table_group or array
|
362
|
+
minimal? ? tables : table_group
|
363
|
+
when :Table
|
364
|
+
table = nil
|
365
|
+
Reader.open(input.url, options.merge(
|
366
|
+
format: :tabular,
|
367
|
+
metadata: input,
|
368
|
+
base: input.url,
|
369
|
+
minimal: minimal?,
|
370
|
+
no_found_metadata: true
|
371
|
+
)) do |r|
|
372
|
+
table = r.to_hash(options)
|
373
|
+
end
|
374
|
+
|
375
|
+
table
|
376
|
+
else
|
377
|
+
raise "Opened inappropriate metadata type: #{input.type}"
|
378
|
+
end
|
379
|
+
end
|
380
|
+
else
|
381
|
+
rows = []
|
382
|
+
table = {}
|
383
|
+
table['@id'] = metadata.id.to_s if metadata.id
|
384
|
+
table['url'] = metadata.url.to_s
|
385
|
+
|
386
|
+
# Use string values notes and common properties
|
387
|
+
metadata.each do |key, value|
|
388
|
+
next unless key.to_s.include?(':') || key == :notes
|
389
|
+
table[key] = metadata.common_properties(nil, key, value)
|
390
|
+
table[key] = [table[key]] if key == :notes && !table[key].is_a?(Array)
|
391
|
+
end unless minimal?
|
392
|
+
|
393
|
+
table.merge!("row" => rows)
|
394
|
+
|
395
|
+
# Input is file containing CSV data.
|
396
|
+
# Output ROW-Level statements
|
397
|
+
metadata.each_row(input) do |row|
|
398
|
+
if row.is_a?(RDF::Statement)
|
399
|
+
# May add additional comments
|
400
|
+
table['rdfs:comment'] ||= []
|
401
|
+
table['rdfs:comment'] << row.object.to_s
|
402
|
+
next
|
403
|
+
end
|
404
|
+
# Output row-level metadata
|
405
|
+
r, a, values = {}, {}, {}
|
406
|
+
r["url"] = row.id.to_s
|
407
|
+
r["rownum"] = row.number
|
408
|
+
|
409
|
+
row.values.each_with_index do |cell, index|
|
410
|
+
column = metadata.tableSchema.columns[index]
|
411
|
+
|
412
|
+
# Ignore suppressed columns
|
413
|
+
next if column.suppressOutput
|
414
|
+
|
415
|
+
# Skip valueUrl cells where the valueUrl is null
|
416
|
+
next if cell.column.valueUrl && cell.valueUrl.nil?
|
417
|
+
|
418
|
+
# Skip empty sequences
|
419
|
+
next if !cell.column.valueUrl && cell.value.is_a?(Array) && cell.value.empty?
|
420
|
+
|
421
|
+
subject = cell.aboutUrl || 'null'
|
422
|
+
co = (a[subject.to_s] ||= {})
|
423
|
+
co['@id'] = subject.to_s unless subject == 'null'
|
424
|
+
prop = case cell.propertyUrl
|
425
|
+
when RDF.type then '@type'
|
426
|
+
when nil then column.name
|
427
|
+
else
|
428
|
+
# Compact the property to a term or prefixed name
|
429
|
+
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
430
|
+
end
|
431
|
+
|
432
|
+
value = case
|
433
|
+
when prop == '@type'
|
434
|
+
metadata.context.compact_iri(cell.valueUrl || cell.value, vocab: true)
|
435
|
+
when cell.valueUrl
|
436
|
+
unless subject == cell.valueUrl
|
437
|
+
values[cell.valueUrl.to_s] ||= {o: co, prop: prop, count: 0}
|
438
|
+
values[cell.valueUrl.to_s][:count] += 1
|
439
|
+
end
|
440
|
+
cell.valueUrl.to_s
|
441
|
+
when cell.value.is_a?(RDF::Literal::Numeric)
|
442
|
+
cell.value.object
|
443
|
+
when cell.value.is_a?(RDF::Literal::Boolean)
|
444
|
+
cell.value.object
|
445
|
+
else
|
446
|
+
cell.value
|
447
|
+
end
|
448
|
+
|
449
|
+
# Add or merge value
|
450
|
+
merge_compacted_value(co, prop, value)
|
451
|
+
end
|
452
|
+
|
453
|
+
# Check for nesting
|
454
|
+
values.keys.each do |valueUrl|
|
455
|
+
next unless a.has_key?(valueUrl)
|
456
|
+
ref = values[valueUrl]
|
457
|
+
co = ref[:o]
|
458
|
+
prop = ref[:prop]
|
459
|
+
next if ref[:count] != 1
|
460
|
+
raise "Expected #{ref[o][prop].inspect} to include #{valueUrl.inspect}" unless Array(co[prop]).include?(valueUrl)
|
461
|
+
co[prop] = Array(co[prop]).map {|e| e == valueUrl ? a.delete(valueUrl) : e}
|
462
|
+
co[prop] = co[prop].first if co[prop].length == 1
|
463
|
+
end
|
464
|
+
|
465
|
+
r["describes"] = a.values
|
466
|
+
|
467
|
+
if minimal?
|
468
|
+
rows.concat(r["describes"])
|
469
|
+
else
|
470
|
+
rows << r
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
minimal? ? table["row"] : table
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
# Return a hash representation of the annotated tabular data model for JSON serialization
|
479
|
+
# @param [Hash{Symbol => Object}] options
|
480
|
+
# @return [Hash]
|
481
|
+
def to_atd(options = {})
|
482
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
483
|
+
if input.is_a?(Metadata)
|
484
|
+
debug("each_statement: metadata") {input.inspect}
|
485
|
+
depth do
|
486
|
+
# Get Metadata to invoke and open referenced files
|
487
|
+
case input.type
|
488
|
+
when :TableGroup
|
489
|
+
table_group = input.to_atd
|
490
|
+
|
491
|
+
input.each_resource do |table|
|
492
|
+
Reader.open(table.url, options.merge(
|
493
|
+
format: :tabular,
|
494
|
+
metadata: table,
|
495
|
+
base: table.url,
|
496
|
+
no_found_metadata: true, # FIXME: remove
|
497
|
+
noProv: true
|
498
|
+
)) do |r|
|
499
|
+
table = r.to_atd(options)
|
500
|
+
|
501
|
+
# Fill in columns and rows in table_group entry from returned table
|
502
|
+
t = table_group[:resources].detect {|tab| tab["url"] == table["url"]}
|
503
|
+
t["columns"] = table["columns"]
|
504
|
+
t["rows"] = table["rows"]
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
# Result is table_group
|
509
|
+
table_group
|
510
|
+
when :Table
|
511
|
+
table = nil
|
512
|
+
Reader.open(input.url, options.merge(
|
513
|
+
format: :tabular,
|
514
|
+
metadata: input,
|
515
|
+
base: input.url,
|
516
|
+
no_found_metadata: true,
|
517
|
+
noProv: true
|
518
|
+
)) do |r|
|
519
|
+
table = r.to_atd(options)
|
520
|
+
end
|
521
|
+
|
522
|
+
table
|
523
|
+
else
|
524
|
+
raise "Opened inappropriate metadata type: #{input.type}"
|
525
|
+
end
|
526
|
+
end
|
527
|
+
else
|
528
|
+
rows = []
|
529
|
+
table = metadata.to_atd
|
530
|
+
rows, columns = table["rows"], table["columns"]
|
531
|
+
|
532
|
+
# Input is file containing CSV data.
|
533
|
+
# Output ROW-Level statements
|
534
|
+
metadata.each_row(input) do |row|
|
535
|
+
rows << row.to_atd
|
536
|
+
row.values.each_with_index do |cell, colndx|
|
537
|
+
columns[colndx]["cells"] << cell.id
|
538
|
+
end
|
539
|
+
end
|
540
|
+
table
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
def minimal?; @options[:minimal]; end
|
545
|
+
def prov?; !(@options[:noProv]); end
|
546
|
+
|
547
|
+
private
|
548
|
+
##
|
549
|
+
# @overload add_statement(lineno, statement)
|
550
|
+
# Add a statement, object can be literal or URI or bnode
|
551
|
+
# @param [String] lineno
|
552
|
+
# @param [RDF::Statement] statement
|
553
|
+
# @yield [RDF::Statement]
|
554
|
+
# @raise [ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
555
|
+
#
|
556
|
+
# @overload add_statement(lineno, subject, predicate, object)
|
557
|
+
# Add a triple
|
558
|
+
# @param [URI, BNode] subject the subject of the statement
|
559
|
+
# @param [URI] predicate the predicate of the statement
|
560
|
+
# @param [URI, BNode, Literal] object the object of the statement
|
561
|
+
# @raise [ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
562
|
+
def add_statement(node, *args)
|
563
|
+
statement = args[0].is_a?(RDF::Statement) ? args[0] : RDF::Statement.new(*args)
|
564
|
+
raise RDF::ReaderError, "#{statement.inspect} is invalid" if validate? && statement.invalid?
|
565
|
+
debug(node) {"statement: #{RDF::NTriples.serialize(statement)}".chomp}
|
566
|
+
@callback.call(statement)
|
567
|
+
end
|
568
|
+
|
569
|
+
# Merge values into compacted results, creating arrays if necessary
|
570
|
+
def merge_compacted_value(hash, key, value)
|
571
|
+
return unless hash
|
572
|
+
case hash[key]
|
573
|
+
when nil then hash[key] = value
|
574
|
+
when Array
|
575
|
+
if value.is_a?(Array)
|
576
|
+
hash[key].concat(value)
|
577
|
+
else
|
578
|
+
hash[key] << value
|
579
|
+
end
|
580
|
+
else
|
581
|
+
hash[key] = [hash[key]]
|
582
|
+
if value.is_a?(Array)
|
583
|
+
hash[key].concat(value)
|
584
|
+
else
|
585
|
+
hash[key] << value
|
586
|
+
end
|
587
|
+
end
|
588
|
+
end
|
589
|
+
end
|
590
|
+
end
|
591
|
+
|