rdf-tabular 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/AUTHORS +1 -0
- data/README.md +73 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/etc/csvw.jsonld +1507 -0
- data/etc/doap.csv +5 -0
- data/etc/doap.csv-metadata.json +34 -0
- data/etc/doap.ttl +35 -0
- data/lib/rdf/tabular.rb +34 -0
- data/lib/rdf/tabular/csvw.rb +477 -0
- data/lib/rdf/tabular/format.rb +46 -0
- data/lib/rdf/tabular/json.rb +0 -0
- data/lib/rdf/tabular/literal.rb +38 -0
- data/lib/rdf/tabular/metadata.rb +2038 -0
- data/lib/rdf/tabular/reader.rb +591 -0
- data/lib/rdf/tabular/utils.rb +33 -0
- data/lib/rdf/tabular/version.rb +18 -0
- data/spec/format_spec.rb +30 -0
- data/spec/matchers.rb +134 -0
- data/spec/metadata_spec.rb +1716 -0
- data/spec/reader_spec.rb +221 -0
- data/spec/spec_helper.rb +47 -0
- data/spec/suite_helper.rb +161 -0
- data/spec/suite_spec.rb +76 -0
- metadata +269 -0
@@ -0,0 +1,591 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
|
3
|
+
module RDF::Tabular
|
4
|
+
##
|
5
|
+
# A Tabular Data to RDF parser in Ruby.
|
6
|
+
#
|
7
|
+
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
8
|
+
class Reader < RDF::Reader
|
9
|
+
format Format
|
10
|
+
include Utils
|
11
|
+
|
12
|
+
# Metadata associated with the CSV
|
13
|
+
#
|
14
|
+
# @return [Metadata]
|
15
|
+
attr_reader :metadata
|
16
|
+
|
17
|
+
##
|
18
|
+
# Input open to read
|
19
|
+
# @return [:read]
|
20
|
+
attr_reader :input
|
21
|
+
|
22
|
+
##
|
23
|
+
# Initializes the RDF::Tabular Reader instance.
|
24
|
+
#
|
25
|
+
# @param [Util::File::RemoteDoc, IO, StringIO, Array<Array<String>>] input
|
26
|
+
# An opened file possibly JSON Metadata,
|
27
|
+
# or an Array used as an internalized array of arrays
|
28
|
+
# @param [Hash{Symbol => Object}] options
|
29
|
+
# any additional options (see `RDF::Reader#initialize`)
|
30
|
+
# @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
|
31
|
+
# @option options [Boolean] :minimal includes only the information gleaned from the cells of the tabular data
|
32
|
+
# @option options [Boolean] :noProv do not output optional provenance information
|
33
|
+
# @yield [reader] `self`
|
34
|
+
# @yieldparam [RDF::Reader] reader
|
35
|
+
# @yieldreturn [void] ignored
|
36
|
+
# @raise [RDF::ReaderError] if the CSV document cannot be loaded
|
37
|
+
def initialize(input = $stdin, options = {}, &block)
|
38
|
+
super do
|
39
|
+
# Base would be how we are to take this
|
40
|
+
@options[:base] ||= base_uri.to_s if base_uri
|
41
|
+
@options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
|
42
|
+
@options[:base] ||= input.path if input.respond_to?(:path)
|
43
|
+
@options[:base] ||= input.filename if input.respond_to?(:filename)
|
44
|
+
if RDF::URI(@options[:base]).relative? && File.exist?(@options[:base])
|
45
|
+
@options[:base] = "file:/#{File.expand_path(@options[:base])}"
|
46
|
+
end
|
47
|
+
|
48
|
+
@options[:depth] ||= 0
|
49
|
+
|
50
|
+
debug("Reader#initialize") {"input: #{input.inspect}, base: #{@options[:base]}"}
|
51
|
+
|
52
|
+
# Minimal implies noProv
|
53
|
+
@options[:noProv] ||= @options[:minimal]
|
54
|
+
|
55
|
+
@input = input.is_a?(String) ? StringIO.new(input) : input
|
56
|
+
|
57
|
+
depth do
|
58
|
+
# If input is JSON, then the input is the metadata
|
59
|
+
if @options[:base] =~ /\.json(?:ld)?$/ ||
|
60
|
+
@input.respond_to?(:content_type) && @input.content_type =~ %r(application/(?:ld+)json)
|
61
|
+
@metadata = Metadata.new(@input, @options.merge(filenames: @options[:base]))
|
62
|
+
# If @metadata is for a Table, merge with something empty to create a TableGroup metadata
|
63
|
+
if @metadata.is_a?(TableGroup)
|
64
|
+
@metadata.normalize!
|
65
|
+
else
|
66
|
+
@metadata = @metadata.merge(TableGroup.new({}))
|
67
|
+
end
|
68
|
+
@input = @metadata
|
69
|
+
elsif @options[:no_found_metadata]
|
70
|
+
# Extract embedded metadata and merge
|
71
|
+
table_metadata = @options[:metadata]
|
72
|
+
embedded_metadata = table_metadata.dialect.embedded_metadata(input, @options)
|
73
|
+
@metadata = table_metadata.dup.merge!(embedded_metadata)
|
74
|
+
else
|
75
|
+
# HTTP flags
|
76
|
+
if @input.respond_to?(:headers) &&
|
77
|
+
input.headers.fetch(:content_type, '').split(';').include?('header=absent')
|
78
|
+
@options[:metadata] ||= Table.new(url: @options[:base])
|
79
|
+
@options[:metadata].dialect.header = false
|
80
|
+
end
|
81
|
+
|
82
|
+
# It's tabluar data. Find metadata and proceed as if it was specified in the first place
|
83
|
+
@metadata = Metadata.for_input(@input, @options)
|
84
|
+
@input = @metadata
|
85
|
+
end
|
86
|
+
|
87
|
+
debug("Reader#initialize") {"input: #{input}, metadata: #{metadata.inspect}"}
|
88
|
+
|
89
|
+
if block_given?
|
90
|
+
case block.arity
|
91
|
+
when 0 then instance_eval(&block)
|
92
|
+
else block.call(self)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
# @private
|
101
|
+
# @see RDF::Reader#each_statement
|
102
|
+
def each_statement(&block)
|
103
|
+
if block_given?
|
104
|
+
@callback = block
|
105
|
+
|
106
|
+
start_time = Time.now
|
107
|
+
|
108
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
109
|
+
if input.is_a?(Metadata)
|
110
|
+
debug("each_statement: metadata") {input.inspect}
|
111
|
+
|
112
|
+
# Validate metadata
|
113
|
+
input.validate!
|
114
|
+
|
115
|
+
depth do
|
116
|
+
# Get Metadata to invoke and open referenced files
|
117
|
+
case input.type
|
118
|
+
when :TableGroup
|
119
|
+
# Use resolved @id of TableGroup, if available
|
120
|
+
table_group = input.id || RDF::Node.new
|
121
|
+
add_statement(0, table_group, RDF.type, CSVW.TableGroup) unless minimal?
|
122
|
+
|
123
|
+
# Common Properties
|
124
|
+
input.each do |key, value|
|
125
|
+
next unless key.to_s.include?(':') || key == :notes
|
126
|
+
input.common_properties(table_group, key, value) do |statement|
|
127
|
+
add_statement(0, statement)
|
128
|
+
end
|
129
|
+
end unless minimal?
|
130
|
+
|
131
|
+
input.each_resource do |table|
|
132
|
+
next if table.suppressOutput
|
133
|
+
table_resource = table.id || RDF::Node.new
|
134
|
+
add_statement(0, table_group, CSVW.table, table_resource) unless minimal?
|
135
|
+
Reader.open(table.url, options.merge(
|
136
|
+
format: :tabular,
|
137
|
+
metadata: table,
|
138
|
+
base: table.url,
|
139
|
+
no_found_metadata: true,
|
140
|
+
table_resource: table_resource
|
141
|
+
)) do |r|
|
142
|
+
r.each_statement(&block)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Provenance
|
147
|
+
if prov?
|
148
|
+
activity = RDF::Node.new
|
149
|
+
add_statement(0, table_group, RDF::PROV.wasGeneratedBy, activity)
|
150
|
+
add_statement(0, activity, RDF.type, RDF::PROV.Activity)
|
151
|
+
add_statement(0, activity, RDF::PROV.wasAssociatedWith, RDF::URI("http://rubygems.org/gems/rdf-tabular"))
|
152
|
+
add_statement(0, activity, RDF::PROV.startedAtTime, RDF::Literal::DateTime.new(start_time))
|
153
|
+
add_statement(0, activity, RDF::PROV.endedAtTime, RDF::Literal::DateTime.new(Time.now))
|
154
|
+
|
155
|
+
unless (urls = input.resources.map(&:url)).empty?
|
156
|
+
usage = RDF::Node.new
|
157
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
158
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
159
|
+
urls.each do |url|
|
160
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(url))
|
161
|
+
end
|
162
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.csvEncodedTabularData)
|
163
|
+
end
|
164
|
+
|
165
|
+
unless Array(input.filenames).empty?
|
166
|
+
usage = RDF::Node.new
|
167
|
+
add_statement(0, activity, RDF::PROV.qualifiedUsage, usage)
|
168
|
+
add_statement(0, usage, RDF.type, RDF::PROV.Usage)
|
169
|
+
Array(input.filenames).each do |fn|
|
170
|
+
add_statement(0, usage, RDF::PROV.entity, RDF::URI(fn))
|
171
|
+
end
|
172
|
+
add_statement(0, usage, RDF::PROV.hadRole, CSVW.tabularMetadata)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
when :Table
|
176
|
+
Reader.open(input.url, options.merge(format: :tabular, metadata: input, base: input.url, no_found_metadata: true)) do |r|
|
177
|
+
r.each_statement(&block)
|
178
|
+
end
|
179
|
+
else
|
180
|
+
raise "Opened inappropriate metadata type: #{input.type}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
return
|
184
|
+
end
|
185
|
+
|
186
|
+
# Output Table-Level RDF triples
|
187
|
+
table_resource = options.fetch(:table_resource, (metadata.id || RDF::Node.new))
|
188
|
+
unless minimal?
|
189
|
+
add_statement(0, table_resource, RDF.type, CSVW.Table)
|
190
|
+
add_statement(0, table_resource, CSVW.url, RDF::URI(metadata.url))
|
191
|
+
end
|
192
|
+
|
193
|
+
# Common Properties
|
194
|
+
metadata.each do |key, value|
|
195
|
+
next unless key.to_s.include?(':') || key == :notes
|
196
|
+
metadata.common_properties(table_resource, key, value) do |statement|
|
197
|
+
add_statement(0, statement)
|
198
|
+
end
|
199
|
+
end unless minimal?
|
200
|
+
|
201
|
+
# Input is file containing CSV data.
|
202
|
+
# Output ROW-Level statements
|
203
|
+
last_row_num = 0
|
204
|
+
metadata.each_row(input) do |row|
|
205
|
+
if row.is_a?(RDF::Statement)
|
206
|
+
# May add additional comments
|
207
|
+
row.subject = table_resource
|
208
|
+
add_statement(last_row_num + 1, row)
|
209
|
+
next
|
210
|
+
end
|
211
|
+
last_row_num = row.sourceNumber
|
212
|
+
|
213
|
+
# Output row-level metadata
|
214
|
+
row_resource = RDF::Node.new
|
215
|
+
default_cell_subject = RDF::Node.new
|
216
|
+
unless minimal?
|
217
|
+
add_statement(row.sourceNumber, table_resource, CSVW.row, row_resource)
|
218
|
+
add_statement(row.sourceNumber, row_resource, CSVW.rownum, row.number)
|
219
|
+
add_statement(row.sourceNumber, row_resource, CSVW.url, row.id)
|
220
|
+
end
|
221
|
+
row.values.each_with_index do |cell, index|
|
222
|
+
next if cell.column.suppressOutput # Skip ignored cells
|
223
|
+
cell_subject = cell.aboutUrl || default_cell_subject
|
224
|
+
propertyUrl = cell.propertyUrl || RDF::URI("#{metadata.url}##{cell.column.name}")
|
225
|
+
add_statement(row.sourceNumber, row_resource, CSVW.describes, cell_subject) unless minimal?
|
226
|
+
|
227
|
+
if cell.column.valueUrl
|
228
|
+
add_statement(row.sourceNumber, cell_subject, propertyUrl, cell.valueUrl) if cell.valueUrl
|
229
|
+
elsif cell.column.ordered && cell.column.separator
|
230
|
+
list = RDF::List[*Array(cell.value)]
|
231
|
+
add_statement(row.sourceNumber, cell_subject, propertyUrl, list.subject)
|
232
|
+
list.each_statement do |statement|
|
233
|
+
next if statement.predicate == RDF.type && statement.object == RDF.List
|
234
|
+
add_statement(row.sourceNumber, statement.subject, statement.predicate, statement.object)
|
235
|
+
end
|
236
|
+
else
|
237
|
+
Array(cell.value).each do |v|
|
238
|
+
add_statement(row.sourceNumber, cell_subject, propertyUrl, v)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
enum_for(:each_statement)
|
245
|
+
end
|
246
|
+
|
247
|
+
##
|
248
|
+
# @private
|
249
|
+
# @see RDF::Reader#each_triple
|
250
|
+
def each_triple(&block)
|
251
|
+
if block_given?
|
252
|
+
each_statement do |statement|
|
253
|
+
block.call(*statement.to_triple)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
enum_for(:each_triple)
|
257
|
+
end
|
258
|
+
|
259
|
+
##
|
260
|
+
# Transform to JSON. Note that this must be run from within the reader context if the input is an open IO stream.
|
261
|
+
#
|
262
|
+
# @example outputing annotated CSV as JSON
|
263
|
+
# result = nil
|
264
|
+
# RDF::Tabular::Reader.open("etc/doap.csv") do |reader|
|
265
|
+
# result = reader.to_json
|
266
|
+
# end
|
267
|
+
# result #=> {...}
|
268
|
+
#
|
269
|
+
# @example outputing annotated CSV as JSON from an in-memory structure
|
270
|
+
# csv = %(
|
271
|
+
# GID,On Street,Species,Trim Cycle,Inventory Date
|
272
|
+
# 1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
|
273
|
+
# 2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
|
274
|
+
# 3,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
|
275
|
+
# ).gsub(/^\s+/, '')
|
276
|
+
# r = RDF::Tabular::Reader.new(csv)
|
277
|
+
# r.to_json #=> {...}
|
278
|
+
#
|
279
|
+
# @param [Hash{Symbol => Object}] options may also be a JSON state
|
280
|
+
# @option options [IO, StringIO] io to output to file
|
281
|
+
# @option options [::JSON::State] :state used when dumping
|
282
|
+
# @option options [Boolean] :atd output Abstract Table representation instead
|
283
|
+
# @return [String]
|
284
|
+
def to_json(options = {})
|
285
|
+
io = case options
|
286
|
+
when IO, StringIO then options
|
287
|
+
when Hash then options[:io]
|
288
|
+
end
|
289
|
+
json_state = case options
|
290
|
+
when Hash
|
291
|
+
case
|
292
|
+
when options.has_key?(:state) then options[:state]
|
293
|
+
when options.has_key?(:indent) then options
|
294
|
+
else ::JSON::LD::JSON_STATE
|
295
|
+
end
|
296
|
+
when ::JSON::State, ::JSON::Ext::Generator::State, ::JSON::Pure::Generator::State
|
297
|
+
options
|
298
|
+
else ::JSON::LD::JSON_STATE
|
299
|
+
end
|
300
|
+
options = {} unless options.is_a?(Hash)
|
301
|
+
|
302
|
+
hash_fn = options[:atd] ? :to_atd : :to_hash
|
303
|
+
options = options.merge(noProv: @options[:noProv])
|
304
|
+
|
305
|
+
if io
|
306
|
+
::JSON::dump_default_options = json_state
|
307
|
+
::JSON.dump(self.send(hash_fn, options), io)
|
308
|
+
else
|
309
|
+
hash = self.send(hash_fn, options)
|
310
|
+
::JSON.generate(hash, json_state)
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
##
|
315
|
+
# Return a hash representation of the data for JSON serialization
|
316
|
+
#
|
317
|
+
# Produces an array if run in minimal mode.
|
318
|
+
#
|
319
|
+
# @param [Hash{Symbol => Object}] options
|
320
|
+
# @return [Hash, Array]
|
321
|
+
def to_hash(options = {})
|
322
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
323
|
+
if input.is_a?(Metadata)
|
324
|
+
debug("each_statement: metadata") {input.inspect}
|
325
|
+
depth do
|
326
|
+
# Get Metadata to invoke and open referenced files
|
327
|
+
case input.type
|
328
|
+
when :TableGroup
|
329
|
+
# Validate metadata
|
330
|
+
input.validate!
|
331
|
+
|
332
|
+
tables = []
|
333
|
+
table_group = {}
|
334
|
+
table_group['@id'] = input.id.to_s if input.id
|
335
|
+
|
336
|
+
# Common Properties
|
337
|
+
input.each do |key, value|
|
338
|
+
next unless key.to_s.include?(':') || key == :notes
|
339
|
+
table_group[key] = input.common_properties(nil, key, value)
|
340
|
+
table_group[key] = [table_group[key]] if key == :notes && !table_group[key].is_a?(Array)
|
341
|
+
end
|
342
|
+
|
343
|
+
table_group['table'] = tables
|
344
|
+
|
345
|
+
input.each_resource do |table|
|
346
|
+
next if table.suppressOutput
|
347
|
+
Reader.open(table.url, options.merge(
|
348
|
+
format: :tabular,
|
349
|
+
metadata: table,
|
350
|
+
base: table.url,
|
351
|
+
minimal: minimal?,
|
352
|
+
no_found_metadata: true
|
353
|
+
)) do |r|
|
354
|
+
case table = r.to_hash(options)
|
355
|
+
when Array then tables += table
|
356
|
+
when Hash then tables << table
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
# Result is table_group or array
|
362
|
+
minimal? ? tables : table_group
|
363
|
+
when :Table
|
364
|
+
table = nil
|
365
|
+
Reader.open(input.url, options.merge(
|
366
|
+
format: :tabular,
|
367
|
+
metadata: input,
|
368
|
+
base: input.url,
|
369
|
+
minimal: minimal?,
|
370
|
+
no_found_metadata: true
|
371
|
+
)) do |r|
|
372
|
+
table = r.to_hash(options)
|
373
|
+
end
|
374
|
+
|
375
|
+
table
|
376
|
+
else
|
377
|
+
raise "Opened inappropriate metadata type: #{input.type}"
|
378
|
+
end
|
379
|
+
end
|
380
|
+
else
|
381
|
+
rows = []
|
382
|
+
table = {}
|
383
|
+
table['@id'] = metadata.id.to_s if metadata.id
|
384
|
+
table['url'] = metadata.url.to_s
|
385
|
+
|
386
|
+
# Use string values notes and common properties
|
387
|
+
metadata.each do |key, value|
|
388
|
+
next unless key.to_s.include?(':') || key == :notes
|
389
|
+
table[key] = metadata.common_properties(nil, key, value)
|
390
|
+
table[key] = [table[key]] if key == :notes && !table[key].is_a?(Array)
|
391
|
+
end unless minimal?
|
392
|
+
|
393
|
+
table.merge!("row" => rows)
|
394
|
+
|
395
|
+
# Input is file containing CSV data.
|
396
|
+
# Output ROW-Level statements
|
397
|
+
metadata.each_row(input) do |row|
|
398
|
+
if row.is_a?(RDF::Statement)
|
399
|
+
# May add additional comments
|
400
|
+
table['rdfs:comment'] ||= []
|
401
|
+
table['rdfs:comment'] << row.object.to_s
|
402
|
+
next
|
403
|
+
end
|
404
|
+
# Output row-level metadata
|
405
|
+
r, a, values = {}, {}, {}
|
406
|
+
r["url"] = row.id.to_s
|
407
|
+
r["rownum"] = row.number
|
408
|
+
|
409
|
+
row.values.each_with_index do |cell, index|
|
410
|
+
column = metadata.tableSchema.columns[index]
|
411
|
+
|
412
|
+
# Ignore suppressed columns
|
413
|
+
next if column.suppressOutput
|
414
|
+
|
415
|
+
# Skip valueUrl cells where the valueUrl is null
|
416
|
+
next if cell.column.valueUrl && cell.valueUrl.nil?
|
417
|
+
|
418
|
+
# Skip empty sequences
|
419
|
+
next if !cell.column.valueUrl && cell.value.is_a?(Array) && cell.value.empty?
|
420
|
+
|
421
|
+
subject = cell.aboutUrl || 'null'
|
422
|
+
co = (a[subject.to_s] ||= {})
|
423
|
+
co['@id'] = subject.to_s unless subject == 'null'
|
424
|
+
prop = case cell.propertyUrl
|
425
|
+
when RDF.type then '@type'
|
426
|
+
when nil then column.name
|
427
|
+
else
|
428
|
+
# Compact the property to a term or prefixed name
|
429
|
+
metadata.context.compact_iri(cell.propertyUrl, vocab: true)
|
430
|
+
end
|
431
|
+
|
432
|
+
value = case
|
433
|
+
when prop == '@type'
|
434
|
+
metadata.context.compact_iri(cell.valueUrl || cell.value, vocab: true)
|
435
|
+
when cell.valueUrl
|
436
|
+
unless subject == cell.valueUrl
|
437
|
+
values[cell.valueUrl.to_s] ||= {o: co, prop: prop, count: 0}
|
438
|
+
values[cell.valueUrl.to_s][:count] += 1
|
439
|
+
end
|
440
|
+
cell.valueUrl.to_s
|
441
|
+
when cell.value.is_a?(RDF::Literal::Numeric)
|
442
|
+
cell.value.object
|
443
|
+
when cell.value.is_a?(RDF::Literal::Boolean)
|
444
|
+
cell.value.object
|
445
|
+
else
|
446
|
+
cell.value
|
447
|
+
end
|
448
|
+
|
449
|
+
# Add or merge value
|
450
|
+
merge_compacted_value(co, prop, value)
|
451
|
+
end
|
452
|
+
|
453
|
+
# Check for nesting
|
454
|
+
values.keys.each do |valueUrl|
|
455
|
+
next unless a.has_key?(valueUrl)
|
456
|
+
ref = values[valueUrl]
|
457
|
+
co = ref[:o]
|
458
|
+
prop = ref[:prop]
|
459
|
+
next if ref[:count] != 1
|
460
|
+
raise "Expected #{ref[o][prop].inspect} to include #{valueUrl.inspect}" unless Array(co[prop]).include?(valueUrl)
|
461
|
+
co[prop] = Array(co[prop]).map {|e| e == valueUrl ? a.delete(valueUrl) : e}
|
462
|
+
co[prop] = co[prop].first if co[prop].length == 1
|
463
|
+
end
|
464
|
+
|
465
|
+
r["describes"] = a.values
|
466
|
+
|
467
|
+
if minimal?
|
468
|
+
rows.concat(r["describes"])
|
469
|
+
else
|
470
|
+
rows << r
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
minimal? ? table["row"] : table
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
# Return a hash representation of the annotated tabular data model for JSON serialization
|
479
|
+
# @param [Hash{Symbol => Object}] options
|
480
|
+
# @return [Hash]
|
481
|
+
def to_atd(options = {})
|
482
|
+
# Construct metadata from that passed from file open, along with information from the file.
|
483
|
+
if input.is_a?(Metadata)
|
484
|
+
debug("each_statement: metadata") {input.inspect}
|
485
|
+
depth do
|
486
|
+
# Get Metadata to invoke and open referenced files
|
487
|
+
case input.type
|
488
|
+
when :TableGroup
|
489
|
+
table_group = input.to_atd
|
490
|
+
|
491
|
+
input.each_resource do |table|
|
492
|
+
Reader.open(table.url, options.merge(
|
493
|
+
format: :tabular,
|
494
|
+
metadata: table,
|
495
|
+
base: table.url,
|
496
|
+
no_found_metadata: true, # FIXME: remove
|
497
|
+
noProv: true
|
498
|
+
)) do |r|
|
499
|
+
table = r.to_atd(options)
|
500
|
+
|
501
|
+
# Fill in columns and rows in table_group entry from returned table
|
502
|
+
t = table_group[:resources].detect {|tab| tab["url"] == table["url"]}
|
503
|
+
t["columns"] = table["columns"]
|
504
|
+
t["rows"] = table["rows"]
|
505
|
+
end
|
506
|
+
end
|
507
|
+
|
508
|
+
# Result is table_group
|
509
|
+
table_group
|
510
|
+
when :Table
|
511
|
+
table = nil
|
512
|
+
Reader.open(input.url, options.merge(
|
513
|
+
format: :tabular,
|
514
|
+
metadata: input,
|
515
|
+
base: input.url,
|
516
|
+
no_found_metadata: true,
|
517
|
+
noProv: true
|
518
|
+
)) do |r|
|
519
|
+
table = r.to_atd(options)
|
520
|
+
end
|
521
|
+
|
522
|
+
table
|
523
|
+
else
|
524
|
+
raise "Opened inappropriate metadata type: #{input.type}"
|
525
|
+
end
|
526
|
+
end
|
527
|
+
else
|
528
|
+
rows = []
|
529
|
+
table = metadata.to_atd
|
530
|
+
rows, columns = table["rows"], table["columns"]
|
531
|
+
|
532
|
+
# Input is file containing CSV data.
|
533
|
+
# Output ROW-Level statements
|
534
|
+
metadata.each_row(input) do |row|
|
535
|
+
rows << row.to_atd
|
536
|
+
row.values.each_with_index do |cell, colndx|
|
537
|
+
columns[colndx]["cells"] << cell.id
|
538
|
+
end
|
539
|
+
end
|
540
|
+
table
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
def minimal?; @options[:minimal]; end
|
545
|
+
def prov?; !(@options[:noProv]); end
|
546
|
+
|
547
|
+
private
|
548
|
+
##
|
549
|
+
# @overload add_statement(lineno, statement)
|
550
|
+
# Add a statement, object can be literal or URI or bnode
|
551
|
+
# @param [String] lineno
|
552
|
+
# @param [RDF::Statement] statement
|
553
|
+
# @yield [RDF::Statement]
|
554
|
+
# @raise [ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
555
|
+
#
|
556
|
+
# @overload add_statement(lineno, subject, predicate, object)
|
557
|
+
# Add a triple
|
558
|
+
# @param [URI, BNode] subject the subject of the statement
|
559
|
+
# @param [URI] predicate the predicate of the statement
|
560
|
+
# @param [URI, BNode, Literal] object the object of the statement
|
561
|
+
# @raise [ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
|
562
|
+
def add_statement(node, *args)
|
563
|
+
statement = args[0].is_a?(RDF::Statement) ? args[0] : RDF::Statement.new(*args)
|
564
|
+
raise RDF::ReaderError, "#{statement.inspect} is invalid" if validate? && statement.invalid?
|
565
|
+
debug(node) {"statement: #{RDF::NTriples.serialize(statement)}".chomp}
|
566
|
+
@callback.call(statement)
|
567
|
+
end
|
568
|
+
|
569
|
+
# Merge values into compacted results, creating arrays if necessary
|
570
|
+
def merge_compacted_value(hash, key, value)
|
571
|
+
return unless hash
|
572
|
+
case hash[key]
|
573
|
+
when nil then hash[key] = value
|
574
|
+
when Array
|
575
|
+
if value.is_a?(Array)
|
576
|
+
hash[key].concat(value)
|
577
|
+
else
|
578
|
+
hash[key] << value
|
579
|
+
end
|
580
|
+
else
|
581
|
+
hash[key] = [hash[key]]
|
582
|
+
if value.is_a?(Array)
|
583
|
+
hash[key].concat(value)
|
584
|
+
else
|
585
|
+
hash[key] << value
|
586
|
+
end
|
587
|
+
end
|
588
|
+
end
|
589
|
+
end
|
590
|
+
end
|
591
|
+
|