rdf-tabular 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ module RDF::Tabular
2
+ ##
3
+ # Tabular Data/CSV format specification.
4
+ #
5
+ # @example Obtaining a Tabular format class
6
+ # RDF::Format.for(:tabular) #=> RDF::Tabular::Format
7
+ # RDF::Format.for(:csv) #=> RDF::Tabular::Format
8
+ # RDF::Format.for(:tsv) #=> RDF::Tabular::Format
9
+ # RDF::Format.for("etc/foaf.csv")
10
+ # RDF::Format.for("etc/foaf.tsv")
11
+ # RDF::Format.for(:file_name => "etc/foaf.csv")
12
+ # RDF::Format.for(:file_name => "etc/foaf.tsv")
13
+ # RDF::Format.for(:file_extension => "csv")
14
+ # RDF::Format.for(:file_extension => "tsv")
15
+ # RDF::Format.for(:content_type => "text/csv")
16
+ # RDF::Format.for(:content_type => "text/tab-separated-values")
17
+ #
18
+ # @example Obtaining serialization format MIME types
19
+ # RDF::Format.content_types #=> {"text/csv" => [RDF::Tabular::Format]}
20
+ #
21
+ # @example Obtaining serialization format file extension mappings
22
+ # RDF::Format.file_extensions #=> {:csv => "text/csv"}
23
+ #
24
+ # @see http://www.w3.org/TR/rdf-testcases/#ntriples
25
+ class Format < RDF::Format
26
+ content_type 'text/csv',
27
+ extensions: [:csv, :tsv],
28
+ alias: 'text/tab-separated-values'
29
+ content_encoding 'utf-8'
30
+
31
+ reader { RDF::Tabular::Reader }
32
+
33
+ ##
34
+ # Sample detection to see if it matches JSON-LD
35
+ #
36
+ # Use a text sample to detect the format of an input file. Sub-classes implement
37
+ # a matcher sufficient to detect probably format matches, including disambiguating
38
+ # between other similar formats.
39
+ #
40
+ # @param [String] sample Beginning several bytes (~ 1K) of input.
41
+ # @return [Boolean]
42
+ def self.detect(sample)
43
+ !!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
44
+ end
45
+ end
46
+ end
File without changes
@@ -0,0 +1,38 @@
1
+ # CSVW-specific literal classes
2
+
3
+ require 'rdf'
4
+ require 'rdf/xsd'
5
+
6
+ module RDF::Tabular
7
+ ##
8
+ # A JSON literal.
9
+ class JSON < RDF::Literal
10
+ DATATYPE = RDF::Tabular::CSVW.json
11
+ GRAMMAR = nil
12
+
13
+ ##
14
+ # @param [Object] value
15
+ # @option options [String] :lexical (nil)
16
+ def initialize(value, options = {})
17
+ @datatype = options[:datatype] || DATATYPE
18
+ @string = options[:lexical] if options.has_key?(:lexical)
19
+ if value.is_a?(String)
20
+ @string ||= value
21
+ else
22
+ @object = value
23
+ end
24
+ end
25
+
26
+ ##
27
+ # Parse value, if necessary
28
+ #
29
+ # @return [Object]
30
+ def object
31
+ @object ||= ::JSON.parse(value)
32
+ end
33
+
34
+ def to_s
35
+ @string ||= value.to_json
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,2038 @@
1
+ require 'json'
2
+ require 'json/ld'
3
+ require 'bcp47'
4
+ require 'addressable/template'
5
+ require 'rdf/xsd'
6
+
7
+ ##
8
+ # CSVM Metadata processor
9
+ #
10
+ # * Extracts Metadata from file or Hash definition
11
+ # * Merges multiple Metadata definitions
12
+ # * Extract Metadata from a CSV file
13
+ # * Return table-level annotations
14
+ # * Return Column-level annotations
15
+ # * Return row iterator with column information
16
+ #
17
+ # @author [Gregg Kellogg](http://greggkellogg.net/)
18
+ module RDF::Tabular
19
+ class Metadata
20
+ include Utils
21
+
22
+ # Hash representation
23
+ attr_accessor :object
24
+
25
+ # Inheritect properties, valid for all types
26
+ INHERITED_PROPERTIES = {
27
+ null: :atomic,
28
+ lang: :atomic,
29
+ textDirection: :atomic,
30
+ separator: :atomic,
31
+ default: :atomic,
32
+ ordered: :atomic,
33
+ datatype: :atomic,
34
+ aboutUrl: :uri_template,
35
+ propertyUrl: :uri_template,
36
+ valueUrl: :uri_template,
37
+ }.freeze
38
+
39
+ # Valid datatypes
40
+ DATATYPES = {
41
+ anyAtomicType: RDF::XSD.anySimpleType,
42
+ anyURI: RDF::XSD.anyURI,
43
+ base64Binary: RDF::XSD.basee65Binary,
44
+ boolean: RDF::XSD.boolean,
45
+ byte: RDF::XSD.byte,
46
+ date: RDF::XSD.date,
47
+ dateTime: RDF::XSD.dateTime,
48
+ dateTimeDuration: RDF::XSD.dateTimeDuration,
49
+ dateTimeStamp: RDF::XSD.dateTimeStamp,
50
+ decimal: RDF::XSD.decimal,
51
+ double: RDF::XSD.double,
52
+ float: RDF::XSD.float,
53
+ ENTITY: RDF::XSD.ENTITY,
54
+ gDay: RDF::XSD.gDay,
55
+ gMonth: RDF::XSD.gMonth,
56
+ gMonthDay: RDF::XSD.gMonthDay,
57
+ gYear: RDF::XSD.gYear,
58
+ gYearMonth: RDF::XSD.gYearMonth,
59
+ hexBinary: RDF::XSD.hexBinary,
60
+ int: RDF::XSD.int,
61
+ integer: RDF::XSD.integer,
62
+ language: RDF::XSD.language,
63
+ long: RDF::XSD.long,
64
+ Name: RDF::XSD.Name,
65
+ NCName: RDF::XSD.NCName,
66
+ negativeInteger: RDF::XSD.negativeInteger,
67
+ nonNegativeInteger: RDF::XSD.nonNegativeInteger,
68
+ nonPositiveInteger: RDF::XSD.nonPositiveInteger,
69
+ normalizedString: RDF::XSD.normalizedString,
70
+ NOTATION: RDF::XSD.NOTATION,
71
+ positiveInteger: RDF::XSD.positiveInteger,
72
+ QName: RDF::XSD.Qname,
73
+ short: RDF::XSD.short,
74
+ string: RDF::XSD.string,
75
+ time: RDF::XSD.time,
76
+ token: RDF::XSD.token,
77
+ unsignedByte: RDF::XSD.unsignedByte,
78
+ unsignedInt: RDF::XSD.unsignedInt,
79
+ unsignedLong: RDF::XSD.unsignedLong,
80
+ unsignedShort: RDF::XSD.unsignedShort,
81
+ yearMonthDuration: RDF::XSD.yearMonthDuration,
82
+
83
+ any: RDF::XSD.anySimpleType,
84
+ binary: RDF::XSD.base64Binary,
85
+ datetime: RDF::XSD.dateTime,
86
+ html: RDF.HTML,
87
+ json: RDF::Tabular::CSVW.JSON,
88
+ number: RDF::XSD.double,
89
+ xml: RDF.XMLLiteral,
90
+ }
91
+
92
+ # A name is restricted according to the following RegExp.
93
+ # @return [RegExp]
94
+ NAME_SYNTAX = %r(\A(?:_col|[a-zA-Z0-9]|%\h\h)([a-zA-Z0-9\._]|%\h\h)*\z)
95
+
96
+ # Local version of the context
97
+ # @return [JSON::LD::Context]
98
+ LOCAL_CONTEXT = ::JSON::LD::Context.new.parse(File.expand_path("../../../../etc/csvw.jsonld", __FILE__))
99
+
100
+ # ID of this Metadata
101
+ # @return [RDF::URI]
102
+ attr_reader :id
103
+
104
+ # URL of related resource
105
+ # @return [RDF::URI]
106
+ attr_reader :url
107
+
108
+ # Parent of this Metadata (TableGroup for Table, ...)
109
+ # @return [Metadata]
110
+ attr_reader :parent
111
+
112
+ # Filename(s) (URI) of opened metadata, if any
113
+ # May be plural when merged
114
+ # @return [Array<RDF::URI>] filenames
115
+ attr_reader :filenames
116
+
117
+ ##
118
+ # Attempt to retrieve the file at the specified path. If it is valid metadata, create a new Metadata object from it, otherwise, an empty Metadata object
119
+ #
120
+ # @param [String] path
121
+ # @param [Hash{Symbol => Object}] options
122
+ # see `RDF::Util::File.open_file` in RDF.rb
123
+ def self.open(path, options = {})
124
+ options = options.merge(
125
+ headers: {
126
+ 'Accept' => 'application/ld+json, application/json'
127
+ }
128
+ )
129
+ path = "file:" + path unless path =~ /^\w+:/
130
+ RDF::Util::File.open_file(path, options) do |file|
131
+ self.new(file, options.merge(base: path, filenames: path))
132
+ end
133
+ end
134
+
135
+ ##
136
+ # Return metadata for a file, based on user-specified and path-relative locations from an input file
137
+ # @param [IO, StringIO] input
138
+ # @param [Hash{Symbol => Object}] options
139
+ # @option options [Metadata, Hash, String, RDF::URI] :metadata user supplied metadata, merged on top of extracted metadata. If provided as a URL, Metadata is loade from that location
140
+ # @option options [RDF::URI] :base
141
+ # The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
142
+ # @return [Metadata]
143
+ def self.for_input(input, options = {})
144
+ base = options[:base]
145
+
146
+ # Use user metadata
147
+ user_metadata = case options[:metadata]
148
+ when Metadata then options[:metadata]
149
+ when Hash
150
+ Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
151
+ when String, RDF::URI
152
+ Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
153
+ end
154
+
155
+ found_metadata = nil
156
+
157
+ # If user_metadata does not describe input, get the first found from linked-, file-, and directory-specific metadata
158
+ unless user_metadata.is_a?(Table) || user_metadata.is_a?(TableGroup) && user_metadata.for_table(base)
159
+ # load link metadata, if available
160
+ locs = []
161
+ if input.respond_to?(:links) &&
162
+ link = input.links.find_link(%w(rel describedby))
163
+ locs << RDF::URI(base).join(link.href)
164
+ end
165
+
166
+ if base
167
+ locs += [RDF::URI("#{base}-metadata.json"), RDF::URI(base).join("metadata.json")]
168
+ end
169
+
170
+ locs.each do |loc|
171
+ found_metadata ||= begin
172
+ Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
173
+ rescue
174
+ debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
175
+ nil
176
+ end
177
+ end
178
+ end
179
+
180
+ # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
181
+ metadata = case
182
+ when user_metadata && found_metadata then user_metadata.merge(found_metadata)
183
+ when user_metadata then user_metadata
184
+ when found_metadata then found_metadata
185
+ else TableGroup.new({resources: [{url: base}]}, options)
186
+ end
187
+
188
+ # Make TableGroup, if not already
189
+ metadata.is_a?(TableGroup) ? metadata : metadata.merge(TableGroup.new({}))
190
+ end
191
+
192
+ ##
193
+ # @private
194
+ def self.new(input, options = {})
195
+ # Triveal case
196
+ return input if input.is_a?(Metadata)
197
+
198
+ object = case input
199
+ when Hash then input
200
+ when IO, StringIO then ::JSON.parse(input.read)
201
+ else ::JSON.parse(input.to_s)
202
+ end
203
+
204
+ unless options[:parent]
205
+ # Add context, if not set (which it should be)
206
+ object['@context'] ||= options.delete(:@context) || options[:context] || 'http://www.w3.org/ns/csvw'
207
+ end
208
+
209
+ klass = case
210
+ when !self.equal?(RDF::Tabular::Metadata)
211
+ self # subclasses can be directly constructed without type dispatch
212
+ else
213
+ type = if options[:type]
214
+ type = options[:type].to_sym
215
+ raise Error, "If provided, type must be one of :TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect]" unless
216
+ [:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect].include?(type)
217
+ type
218
+ end
219
+
220
+ # Figure out type by @type
221
+ type ||= object['@type']
222
+
223
+ # Figure out type by site
224
+ object_keys = object.keys.map(&:to_s)
225
+ type ||= case
226
+ when %w(resources).any? {|k| object_keys.include?(k)} then :TableGroup
227
+ when %w(dialect tableSchema transformations).any? {|k| object_keys.include?(k)} then :Table
228
+ when %w(targetFormat scriptFormat source).any? {|k| object_keys.include?(k)} then :Transformation
229
+ when %w(columns primaryKey foreignKeys urlTemplate).any? {|k| object_keys.include?(k)} then :Schema
230
+ when %w(name required).any? {|k| object_keys.include?(k)} then :Column
231
+ when %w(commentPrefix delimiter doubleQuote encoding header headerColumnCount headerRowCount).any? {|k| object_keys.include?(k)} then :Dialect
232
+ when %w(lineTerminator quoteChar skipBlankRows skipColumns skipInitialSpace skipRows trim).any? {|k| object_keys.include?(k)} then :Dialect
233
+ end
234
+
235
+ case type.to_s.to_sym
236
+ when :TableGroup then RDF::Tabular::TableGroup
237
+ when :Table then RDF::Tabular::Table
238
+ when :Transformation then RDF::Tabular::Transformation
239
+ when :Schema then RDF::Tabular::Schema
240
+ when :Column then RDF::Tabular::Column
241
+ when :Dialect then RDF::Tabular::Dialect
242
+ else
243
+ raise Error, "Unkown metadata type: #{type.inspect}"
244
+ end
245
+ end
246
+
247
+ md = klass.allocate
248
+ md.send(:initialize, object, options)
249
+ md
250
+ end
251
+
252
+ ##
253
+ # Create Metadata from IO, Hash or String
254
+ #
255
+ # @param [Metadata, Hash, #read] input
256
+ # @param [Hash{Symbol => Object}] options
257
+ # @option options [:TableGroup, :Table, :Transformation, :Schema, :Column, :Dialect] :type
258
+ # Type of schema, if not set, intuited from properties
259
+ # @option options [JSON::LD::Context] context
260
+ # Context used for this metadata. Taken from input if not provided
261
+ # @option options [RDF::URI] :base
262
+ # The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
263
+ # @raise [Error]
264
+ # @return [Metadata]
265
+ def initialize(input, options = {})
266
+ @options = options.dup
267
+
268
+ # Get context from input
269
+ # Optimize by using built-in version of context, and just extract @base, @lang
270
+ @context = case input['@context']
271
+ when Array then LOCAL_CONTEXT.parse(input['@context'].detect {|e| e.is_a?(Hash)} || {})
272
+ when Hash then LOCAL_CONTEXT.parse(input['@context'])
273
+ when nil then nil
274
+ else LOCAL_CONTEXT
275
+ end
276
+
277
+ reason = @options.delete(:reason)
278
+
279
+ @options[:base] ||= @context.base if @context
280
+ @options[:base] ||= input.base_uri if input.respond_to?(:base_uri)
281
+ @options[:base] ||= input.filename if input.respond_to?(:filename)
282
+ @options[:base] = RDF::URI(@options[:base])
283
+
284
+ @context.base = @options[:base] if @context
285
+
286
+ @options[:depth] ||= 0
287
+ @filenames = Array(@options[:filenames]).map {|fn| RDF::URI(fn)} if @options[:filenames]
288
+ @properties = self.class.const_get(:PROPERTIES)
289
+ @required = self.class.const_get(:REQUIRED)
290
+
291
+ @object = {}
292
+
293
+ # Parent of this Metadata, if any
294
+ @parent = @options[:parent]
295
+
296
+ depth do
297
+ # Input was parsed in .new
298
+ # Metadata is object with symbolic keys
299
+ input.each do |key, value|
300
+ key = key.to_sym
301
+ case key
302
+ when :columns
303
+ # An array of template specifications that provide mechanisms to transform the tabular data into other formats
304
+ object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
305
+ number = 0
306
+ value.map do |v|
307
+ number += 1
308
+ Column.new(v, @options.merge(table: (parent if parent.is_a?(Table)), parent: self, context: nil, number: number))
309
+ end
310
+ else
311
+ # Invalid, but preserve value
312
+ value
313
+ end
314
+ when :datatype
315
+ # If in object form, normalize keys to symbols
316
+ object[key] = case value
317
+ when Hash
318
+ value.inject({}) {|memo, (k,v)| memo[k.to_sym] = v; memo}
319
+ else
320
+ value
321
+ end
322
+ when :dialect
323
+ # If provided, dialect provides hints to processors about how to parse the referenced file to create a tabular data model.
324
+ object[key] = case value
325
+ when String then Dialect.open(base.join(value), @options.merge(parent: self, context: nil))
326
+ when Hash then Dialect.new(value, @options.merge(parent: self, context: nil))
327
+ else
328
+ # Invalid, but preserve value
329
+ value
330
+ end
331
+ @type ||= :Table
332
+ when :resources
333
+ # An array of table descriptions for the tables in the group.
334
+ object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
335
+ value.map {|v| Table.new(v, @options.merge(parent: self, context: nil))}
336
+ else
337
+ # Invalid, but preserve value
338
+ value
339
+ end
340
+ when :tableSchema
341
+ # An object property that provides a schema description as described in section 3.8 Schemas, for all the tables in the group. This may be provided as an embedded object within the JSON metadata or as a URL reference to a separate JSON schema document
342
+ # SPEC SUGGESTION: when loading a remote schema, assign @id from it's location if not already set
343
+ object[key] = case value
344
+ when String
345
+ link = base.join(value).to_s
346
+ s = Schema.open(link, @options.merge(parent: self, context: nil))
347
+ s[:@id] ||= link
348
+ s
349
+ when Hash then Schema.new(value, @options.merge(parent: self, context: nil))
350
+ else
351
+ # Invalid, but preserve value
352
+ value
353
+ end
354
+ when :transformations
355
+ # An array of template specifications that provide mechanisms to transform the tabular data into other formats
356
+ object[key] = if value.is_a?(Array) && value.all? {|v| v.is_a?(Hash)}
357
+ value.map {|v| Transformation.new(v, @options.merge(parent: self, context: nil))}
358
+ else
359
+ # Invalid, but preserve value
360
+ value
361
+ end
362
+ when :url
363
+ # URL of CSV relative to metadata
364
+ object[:url] = value
365
+ @url = base.join(value)
366
+ @context.base = @url if @context # Use as base for expanding IRIs
367
+ when :@id
368
+ # metadata identifier
369
+ object[:@id] = value
370
+ @id = base.join(value)
371
+ else
372
+ if @properties.has_key?(key)
373
+ self.send("#{key}=".to_sym, value)
374
+ else
375
+ object[key] = value
376
+ end
377
+ end
378
+ end
379
+ end
380
+
381
+ # Set type from @type, if present and not otherwise defined
382
+ @type ||= object[:@type].to_sym if object[:@type]
383
+ if reason
384
+ debug("md#initialize") {reason}
385
+ debug("md#initialize") {"filenames: #{filenames}"}
386
+ debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
387
+ end
388
+ end
389
+
390
+ # Setters
391
+ INHERITED_PROPERTIES.keys.each do |a|
392
+ define_method("#{a}=".to_sym) do |value|
393
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
394
+ end
395
+ end
396
+
397
+ # Context used for this metadata. Use parent's if not defined on self.
398
+ # @return [JSON::LD::Context]
399
+ def context
400
+ @context || (parent.context if parent)
401
+ end
402
+
403
+ # Treat `dialect` similar to an inherited property, but merge together values from Table and TableGroup
404
+ # @return [Dialect]
405
+ def dialect
406
+ @dialect ||= case
407
+ when object[:dialect] then object[:dialect]
408
+ when parent then parent.dialect
409
+ when is_a?(Table) || is_a?(TableGroup)
410
+ d = Dialect.new({}, @options.merge(parent: self, context: nil))
411
+ self.dialect = d unless self.parent
412
+ d
413
+ else
414
+ raise Error, "Can't access dialect from #{self.class} without a parent"
415
+ end
416
+ end
417
+
418
+ # Set new dialect
419
+ # @return [Dialect]
420
+ def dialect=(value)
421
+ # Clear cached dialect information from children
422
+ object.values.each do |v|
423
+ case v
424
+ when Metadata then v.object.delete(:dialect)
425
+ when Array then v.each {|vv| vv.object.delete(:dialect) if vv.is_a?(Metadata)}
426
+ end
427
+ end
428
+
429
+ if value.is_a?(Hash)
430
+ @dialect = object[:dialect] = Dialect.new(value)
431
+ elsif value
432
+ # Remember invalid dialect for validation purposes
433
+ object[:dialect] = value
434
+ else
435
+ object.delete(:dialect)
436
+ @dialect = nil
437
+ end
438
+ end
439
+
440
+ # Type of this Metadata
441
+ # @return [:TableGroup, :Table, :Transformation, :Schema, :Column]
442
+ def type; self.class.name.split('::').last.to_sym; end
443
+
444
+ # Base URL of metadata
445
+ # @return [RDF::URI]
446
+ def base; @options[:base]; end
447
+
448
+ ##
449
+ # Do we have valid metadata?
450
+ def valid?
451
+ validate!
452
+ true
453
+ rescue
454
+ false
455
+ end
456
+
457
+ ##
458
+ # Validation errors
459
+ # @return [Array<String>]
460
+ def errors
461
+ validate! && []
462
+ rescue Error => e
463
+ e.message.split("\n")
464
+ end
465
+
466
+ ##
467
+ # Validate metadata, raising an error containing all errors detected during validation
468
+ # @raise [Error] Raise error if metadata has any unexpected properties
469
+ # @return [self]
470
+ def validate!
471
+ expected_props, required_props = @properties.keys, @required
472
+ errors = []
473
+
474
+ unless is_a?(Dialect) || is_a?(Transformation)
475
+ expected_props = expected_props + INHERITED_PROPERTIES.keys
476
+ end
477
+
478
+ # It has only expected properties (exclude metadata)
479
+ check_keys = object.keys - [:"@id", :"@context"]
480
+ check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
481
+ errors << "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
482
+
483
+ # It has required properties
484
+ errors << "#{type} missing required keys: #{(required_props & check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
485
+
486
+ # Every property is valid
487
+ object.keys.each do |key|
488
+ value = object[key]
489
+ case key
490
+ when :aboutUrl, :datatype, :default, :lang, :null, :ordered, :propertyUrl, :separator, :textDirection, :valueUrl
491
+ valid_inherited_property?(key, value) {|m| errors << m}
492
+ when :columns
493
+ if value.is_a?(Array) && value.all? {|v| v.is_a?(Column)}
494
+ value.each do |v|
495
+ begin
496
+ v.validate!
497
+ rescue Error => e
498
+ errors << e.message
499
+ end
500
+ end
501
+ column_names = value.map(&:name)
502
+ errors << "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
503
+ else
504
+ errors << "#{type} has invalid property '#{key}': expected array of Columns"
505
+ end
506
+ when :commentPrefix, :delimiter, :quoteChar
507
+ unless value.is_a?(String) && value.length == 1
508
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a single character string"
509
+ end
510
+ when :format, :lineTerminator, :uriTemplate
511
+ unless value.is_a?(String)
512
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a string"
513
+ end
514
+ when :dialect
515
+ unless value.is_a?(Dialect)
516
+ errors << "#{type} has invalid property '#{key}': expected a Dialect Description"
517
+ end
518
+ begin
519
+ value.validate! if value
520
+ rescue Error => e
521
+ errors << e.message
522
+ end
523
+ when :doubleQuote, :header, :required, :skipInitialSpace, :skipBlankRows, :suppressOutput, :virtual
524
+ unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
525
+ errors << "#{type} has invalid property '#{key}': #{value}, expected boolean true or false"
526
+ end
527
+ when :encoding
528
+ unless (Encoding.find(value) rescue false)
529
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid encoding"
530
+ end
531
+ when :foreignKeys
532
+ # An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
533
+ value.is_a?(Array) && value.each do |fk|
534
+ if fk.is_a?(Hash)
535
+ columnReference, reference = fk['columnReference'], fk['reference']
536
+ errors << "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
537
+ errors << "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
538
+
539
+ # Verify that columns exist in this schema
540
+ Array(columnReference).each do |k|
541
+ errors << "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c.name == k}
542
+ end
543
+
544
+ if reference.is_a?(Hash)
545
+ ref_cols = reference['columnReference']
546
+ schema = if reference.has_key?('resource')
547
+ if reference.has_key?('schemaReference')
548
+ errors << "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
549
+ end
550
+ # resource is the URL of a Table in the TableGroup
551
+ ref = base.join(reference['resource']).to_s
552
+ table = root.is_a?(TableGroup) && root.resources.detect {|t| t.url == ref}
553
+ errors << "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
554
+ table.tableSchema if table
555
+ elsif reference.has_key?('schemaReference')
556
+ # resource is the @id of a Schema in the TableGroup
557
+ ref = base.join(reference['schemaReference']).to_s
558
+ tables = root.is_a?(TableGroup) ? root.resources.select {|t| t.tableSchema[:@id] == ref} : []
559
+ case tables.length
560
+ when 0
561
+ errors << "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
562
+ nil
563
+ when 1
564
+ tables.first.tableSchema
565
+ else
566
+ errors << "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
567
+ nil
568
+ end
569
+ end
570
+
571
+ if schema
572
+ # ref_cols must exist in schema
573
+ Array(ref_cols).each do |k|
574
+ errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c.name == k}
575
+ end
576
+ end
577
+ else
578
+ errors << "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
579
+ end
580
+ else
581
+ errors << "#{type} has invalid property '#{key}': reference must be an object: #{reference.inspect}"
582
+ end
583
+ end
584
+ when :headerColumnCount, :headerRowCount, :skipColumns, :skipRows
585
+ unless value.is_a?(Numeric) && value.integer? && value > 0
586
+ errors << "#{type} has invalid property '#{key}': #{value.inspect} must be a positive integer"
587
+ end
588
+ when :length, :minLength, :maxLength
589
+ unless value.is_a?(Numeric) && value.integer? && value > 0
590
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected a positive integer"
591
+ end
592
+ unless key == :length || value != object[:length]
593
+ # Applications must raise an error if length, maxLength or minLength are specified and the cell value is not a list (ie separator is not specified), a string or one of its subtypes, or a binary value.
594
+ errors << "#{type} has invalid property '#{key}': Use of both length and #{key} requires they be equal"
595
+ end
596
+ when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
597
+ unless value.is_a?(Numeric) ||
598
+ RDF::Literal::Date.new(value.to_s).valid? ||
599
+ RDF::Literal::Time.new(value.to_s).valid? ||
600
+ RDF::Literal::DateTime.new(value.to_s).valid?
601
+ errors << "#{type} has invalid property '#{key}': #{value}, expected numeric or valid date/time"
602
+ end
603
+ when :name
604
+ unless value.is_a?(String) && name.match(NAME_SYNTAX)
605
+ errors << "#{type} has invalid property '#{key}': #{value}, expected proper name format"
606
+ end
607
+ when :notes
608
+ unless value.is_a?(Hash) || value.is_a?(Array)
609
+ errors << "#{type} has invalid property '#{key}': #{value}, Object or Array"
610
+ end
611
+ begin
612
+ normalize_jsonld(key, value)
613
+ rescue Error => e
614
+ errors << "#{type} has invalid content '#{key}': #{e.message}"
615
+ end
616
+ when :primaryKey
617
+ # A column reference property that holds either a single reference to a column description object or an array of references.
618
+ Array(value).each do |k|
619
+ errors << "#{type} has invalid property '#{key}': column reference not found #{k}" unless self.columns.any? {|c| c.name == k}
620
+ end
621
+ when :resources
622
+ if value.is_a?(Array) && value.all? {|v| v.is_a?(Table)}
623
+ value.each do |t|
624
+ begin
625
+ t.validate!
626
+ rescue Error => e
627
+ errors << e.message
628
+ end
629
+ end
630
+ else
631
+ errors << "#{type} has invalid property '#{key}': expected array of Tables"
632
+ end
633
+ when :scriptFormat, :targetFormat
634
+ unless RDF::URI(value).valid?
635
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
636
+ end
637
+ when :source
638
+ unless %w(json rdf).include?(value) || value.nil?
639
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected json or rdf"
640
+ end
641
+ when :tableDirection
642
+ unless %w(rtl ltr default).include?(value)
643
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected rtl, ltr, or default"
644
+ end
645
+ when :tableSchema
646
+ if value.is_a?(Schema)
647
+ begin
648
+ value.validate!
649
+ rescue Error => e
650
+ errors << e.message
651
+ end
652
+ else
653
+ errors << "#{type} has invalid property '#{key}': expected Schema"
654
+ end
655
+ when :transformations
656
+ if value.is_a?(Array) && value.all? {|v| v.is_a?(Transformation)}
657
+ value.each do |t|
658
+ begin
659
+ t.validate!
660
+ rescue Error => e
661
+ errors << e.message
662
+ end
663
+ end
664
+ else
665
+ errors << "#{type} has invalid property '#{key}': expected array of Transformations"
666
+ end
667
+ when :title
668
+ valid_natural_language_property?(:title, value) {|m| errors << m}
669
+ when :trim
670
+ unless %w(true false 1 0 start end).include?(value.to_s.downcase)
671
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected true, false, 1, 0, start or end"
672
+ end
673
+ when :url
674
+ unless @url.valid?
675
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected valid absolute URL"
676
+ end
677
+ when :@id, :@context
678
+ # Skip these
679
+ when :@type
680
+ unless value.to_sym == type
681
+ errors << "#{type} has invalid property '#{key}': #{value.inspect}, expected #{type}"
682
+ end
683
+ when ->(k) {key.to_s.include?(':')}
684
+ begin
685
+ normalize_jsonld(key, value)
686
+ rescue Error => e
687
+ errors << "#{type} has invalid content '#{key}': #{e.message}"
688
+ end
689
+ else
690
+ errors << "#{type} has invalid property '#{key}': unsupported property"
691
+ end
692
+ end
693
+
694
+ raise Error, errors.join("\n") unless errors.empty?
695
+ self
696
+ end
697
+
698
+ ##
699
+ # Determine if a natural language property is valid
700
+ # @param [String, Array<String>, Hash{String => String}] value
701
+ # @yield message error message
702
+ # @return [Boolean]
703
+ def valid_natural_language_property?(key, value)
704
+ unless value.is_a?(Hash) && value.all? {|k, v| Array(v).all? {|vv| vv.is_a?(String)}}
705
+ yield "#{type} has invalid property '#{key}': #{value.inspect}, expected a valid natural language property" if block_given?
706
+ false
707
+ end
708
+ end
709
+
710
+ ##
711
+ # Determine if an inherited property is valid
712
+ # @param [String, Array<String>, Hash{String => String}] value
713
+ # @yield message error message
714
+ # @return [Boolean]
715
+ def valid_inherited_property?(key, value)
716
+ pv = parent.send(key) if parent
717
+ error = case key
718
+ when :aboutUrl, :default, :propertyUrl, :valueUrl
719
+ "string" unless value.is_a?(String)
720
+ when :datatype
721
+ # Normalization usually redundant
722
+ dt = normalize_datatype(value)
723
+ # FIXME: support arrays of datatypes?
724
+ "valid datatype" unless DATATYPES.keys.map(&:to_s).include?(dt[:base]) || RDF::URI(dt[:base]).absolute?
725
+ when :lang
726
+ "valid BCP47 language tag" unless BCP47::Language.identify(value.to_s)
727
+ when :null
728
+ # To be valid, it must be a string or array, and must be compatible with any inherited value through being a subset
729
+ "string or array of strings" unless !value.is_a?(Hash) && Array(value).all? {|v| v.is_a?(String)}
730
+ when :ordered
731
+ "boolean" unless value.is_a?(TrueClass) || value.is_a?(FalseClass)
732
+ when :separator
733
+ "single character" unless value.nil? || value.is_a?(String) && value.length == 1
734
+ when :textDirection
735
+ # A value for this property is compatible with an inherited value only if they are identical.
736
+ "rtl or ltr" unless %(rtl ltr).include?(value)
737
+ end ||
738
+
739
+ case key
740
+ # Compatibility
741
+ when :aboutUrl, :propertyUrl, :valueUrl
742
+ # No restrictions
743
+ when :default, :ordered, :separator, :textDirection
744
+ "same as that defined on parent" if pv && pv != value
745
+ when :datatype
746
+ if pv
747
+ # Normalization usually redundant
748
+ dt = normalize_datatype(value)
749
+ pvdt = normalize_datatype(pv)
750
+ vl = RDF::Literal.new("", datatype: DATATYPES[dt[:base].to_sym])
751
+ pvvl = RDF::Literal.new("", datatype: DATATYPES[pvdt[:base].to_sym])
752
+ # must be a subclass of some type defined on parent
753
+ "compatible datatype of that defined on parent" unless vl.is_a?(pvvl.class)
754
+ end
755
+ when :lang
756
+ "lang expected to restrict #{pv}" if pv && !value.start_with?(pv)
757
+ when :null
758
+ "subset of that defined on parent" if pv && (Array(value) & Array(pv)) != Array(value)
759
+ end
760
+
761
+ if error
762
+ yield "#{type} has invalid property '#{key}' ('#{value}'): expected #{error}"
763
+ false
764
+ else
765
+ true
766
+ end
767
+ end
768
+
769
+ ##
770
+ # Yield each data row from the input file
771
+ #
772
+ # @param [:read] input
773
+ # @yield [Row]
774
+ def each_row(input)
775
+ csv = ::CSV.new(input, csv_options)
776
+ # Skip skipRows and headerRowCount
777
+ number, skipped = 0, (dialect.skipRows.to_i + dialect.headerRowCount)
778
+ (1..skipped).each {csv.shift}
779
+ csv.each do |data|
780
+ # Check for embedded comments
781
+ if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
782
+ v = data.join(' ')[1..-1].strip
783
+ unless v.empty?
784
+ (self["rdfs:comment"] ||= []) << v
785
+ yield RDF::Statement.new(nil, RDF::RDFS.comment, RDF::Literal(v))
786
+ end
787
+ skipped += 1
788
+ next
789
+ elsif dialect.skipBlankRows && data.join("").strip.empty?
790
+ skipped += 1
791
+ next
792
+ end
793
+ number += 1
794
+ yield(Row.new(data, self, number, number + skipped))
795
+ end
796
+ end
797
+
798
+ ##
799
+ # Return JSON-friendly or yield RDF for common properties
800
+ #
801
+ # @overload common_properties(subject, property, value, &block)
802
+ # Yield RDF statements
803
+ # @param [RDF::Resource] subject
804
+ # @param [String] property
805
+ # @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value
806
+ # @yield property, value
807
+ # @yieldparam [String] property as a PName or URL
808
+ # @yieldparam [RDF::Statement] statement
809
+ #
810
+ # @overload common_properties(subject, property, value)
811
+ # Return value with expanded values and node references flattened
812
+ # @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>] simply extracted from metadata
813
+ def common_properties(subject, property, value, &block)
814
+ if block_given?
815
+ property = context.expand_iri(property.to_s, vocab: true) unless property.is_a?(RDF::URI)
816
+ case value
817
+ when Array
818
+ value.each {|v| common_properties(subject, property, v, &block)}
819
+ when Hash
820
+ if value['@value']
821
+ dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
822
+ lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
823
+ block.call(RDF::Statement.new(subject, property, lit))
824
+ else
825
+ # value MUST be a node object, establish a new subject from `@id`
826
+ s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
827
+
828
+ # Generate a triple
829
+ block.call(RDF::Statement.new(subject, property, s2))
830
+
831
+ # Generate types
832
+ Array(value['@type']).each do |t|
833
+ block.call(RDF::Statement.new(s2, RDF.type, context.expand_iri(t, vocab: true)))
834
+ end
835
+
836
+ # Generate triples for all other properties
837
+ value.each do |prop, val|
838
+ next if prop.to_s.start_with?('@')
839
+ common_properties(s2, prop, val, &block)
840
+ end
841
+ end
842
+ else
843
+ # Value is a primitive JSON value
844
+ lit = RDF::Literal(value)
845
+ block.call(RDF::Statement.new(subject, property, RDF::Literal(value)))
846
+ end
847
+ else
848
+ case value
849
+ when Array
850
+ value.map {|v| common_properties(subject, property, v)}
851
+ when Hash
852
+ if value['@value']
853
+ value['@value']
854
+ elsif value.keys == %w(@id) && value['@id']
855
+ value['@id']
856
+ else
857
+ nv = {}
858
+ value.each do |k, v|
859
+ case k.to_s
860
+ when '@id' then nv[k.to_s] = context.expand_iri(v['@id']).to_s
861
+ when '@type' then nv[k.to_s] = v
862
+ else nv[k.to_s] = common_properties(nil, k, v)
863
+ end
864
+ end
865
+ nv
866
+ end
867
+ else
868
+ value
869
+ end
870
+ end
871
+ end
872
+
873
+ # Does the Metadata have any common properties?
874
+ # @return [Boolean]
875
+ def has_annotations?
876
+ object.keys.any? {|k| k.to_s.include?(':')}
877
+ end
878
+
879
+ # Merge metadata into this a copy of this metadata
880
+ # @param [Array<Metadata>] metadata
881
+ # @return [Metadata]
882
+ def merge(*metadata)
883
+ return self if metadata.empty?
884
+ # If the top-level object of any of the metadata files are table descriptions, these are treated as if they were table group descriptions containing a single table description (ie having a single resource property whose value is the same as the original table description).
885
+ this = case self
886
+ when TableGroup then self.dup
887
+ when Table
888
+ if self.is_a?(Table) && self.parent
889
+ self.parent
890
+ else
891
+ content = {"@type" => "TableGroup", "resources" => [self]}
892
+ content['@context'] = object.delete(:@context) if object[:@context]
893
+ ctx = @context
894
+ self.remove_instance_variable(:@context) if self.instance_variables.include?(:@context)
895
+ tg = TableGroup.new(content, filenames: @filenames, base: base)
896
+ @parent = tg # Link from parent
897
+ tg
898
+ end
899
+ else self.dup
900
+ end
901
+
902
+ # Merge all passed metadata into this
903
+ merged = metadata.reduce(this) do |memo, md|
904
+ md = case md
905
+ when TableGroup then md
906
+ when Table
907
+ if md.parent
908
+ md.parent
909
+ else
910
+ content = {"@type" => "TableGroup", "resources" => [md]}
911
+ ctx = md.context
912
+ content['@context'] = md.object.delete(:@context) if md.object[:@context]
913
+ md.remove_instance_variable(:@context) if md.instance_variables.include?(:@context)
914
+ tg = TableGroup.new(content, filenames: md.filenames, base: md.base)
915
+ md.instance_variable_set(:@parent, tg) # Link from parent
916
+ tg
917
+ end
918
+ else
919
+ md
920
+ end
921
+
922
+ raise "Can't merge #{memo.class} with #{md.class}" unless memo.class == md.class
923
+
924
+ memo.merge!(md)
925
+ end
926
+
927
+ # Set @context of merged
928
+ merged[:@context] = 'http://www.w3.org/ns/csvw'
929
+ merged
930
+ end
931
+
932
+ # Merge metadata into self
933
+ def merge!(metadata)
934
+ raise "Merging non-equivalent metadata types: #{self.class} vs #{metadata.class}" unless self.class == metadata.class
935
+
936
+ depth do
937
+ # Merge filenames
938
+ if @filenames || metadata.filenames
939
+ @filenames = (Array(@filenames) | Array(metadata.filenames)).uniq
940
+ end
941
+
942
+ # Normalize A (this) and B (metadata) values into normal form
943
+ self.normalize!
944
+ metadata = metadata.dup.normalize!
945
+
946
+ @dialect = nil # So that it is re-built when needed
947
+ # Merge each property from metadata into self
948
+ metadata.each do |key, value|
949
+ case @properties[key]
950
+ when :array
951
+ # If the property is an array property, the way in which values are merged depends on the property; see the relevant property for this definition.
952
+ object[key] = case object[key]
953
+ when nil then []
954
+ when Hash then [object[key]] # Shouldn't happen if well formed
955
+ else object[key]
956
+ end
957
+
958
+ value = [value] if value.is_a?(Hash)
959
+ case key
960
+ when :notes
961
+ # If the property is notes, the result is an array containing values from A followed by values from B.
962
+ a = object[key].is_a?(Array) ? object[key] : [object[key]].compact
963
+ b = value.is_a?(Array) ? value : [value]
964
+ object[key] = a + b
965
+ when :resources
966
+ # When an array of table descriptions B is imported into an original array of table descriptions A, each table description within B is combined into the original array A by:
967
+ value.each do |tb|
968
+ if ta = object[key].detect {|e| e.url == tb.url}
969
+ # if there is a table description with the same url in A, the table description from B is imported into the matching table description in A
970
+ debug("merge!: resources") {"TA: #{ta.inspect}, TB: #{tb.inspect}"}
971
+ ta.merge!(tb)
972
+ else
973
+ # otherwise, the table description from B is appended to the array of table descriptions A
974
+ tb = tb.dup
975
+ tb.instance_variable_set(:@parent, self)
976
+ debug("merge!: resources") {"add TB: #{tb.inspect}"}
977
+ object[key] << tb
978
+ end
979
+ end
980
+ when :transformations
981
+ # SPEC CONFUSION: differing transformations with same @id?
982
+ # When an array of template specifications B is imported into an original array of template specifications A, each template specification within B is combined into the original array A by:
983
+ value.each do |t|
984
+ if ta = object[key].detect {|e| e.targetFormat == t.targetFormat && e.scriptFormat == t.scriptFormat}
985
+ # if there is a template specification with the same targetFormat and scriptFormat in A, the template specification from B is imported into the matching template specification in A
986
+ ta.merge!(t)
987
+ else
988
+ # otherwise, the template specification from B is appended to the array of template specifications A
989
+ t = t.dup
990
+ t.instance_variable_set(:@parent, self) if self
991
+ object[key] << t
992
+ end
993
+ end
994
+ when :columns
995
+ # When an array of column descriptions B is imported into an original array of column descriptions A, each column description within B is combined into the original array A by:
996
+ Array(value).each_with_index do |cb, index|
997
+ ca = object[key][index] || {}
998
+ va = ([ca[:name]] + (ca[:title] || {}).values.flatten).compact.map(&:downcase)
999
+ vb = ([cb[:name]] + (cb[:title] || {}).values.flatten).compact.map(&:downcase)
1000
+ if !(va & vb).empty?
1001
+ debug("merge!: columns") {"index: #{index}, va: #{va}, vb: #{vb}"}
1002
+ # If there's a non-empty case-insensitive intersection between the name and title values for the column description at the same index within A and B, the column description from B is imported into the matching column description in A
1003
+ ca.merge!(cb)
1004
+ elsif ca.nil? && cb.virtual
1005
+ debug("merge!: columns") {"index: #{index}, virtual"}
1006
+ # otherwise, if at a given index there is no column description within A, but there is a column description within B.
1007
+ cb = cb.dup
1008
+ cb.instance_variable_set(:@parent, self) if self
1009
+ object[key][index] = cb
1010
+ else
1011
+ debug("merge!: columns") {"index: #{index}, ignore"}
1012
+ raise Error, "Columns at same index don't match: #{ca.to_json} vs. #{cb.to_json}"
1013
+ end
1014
+ end
1015
+ # The number of non-virtual columns in A and B MUST be the same
1016
+ nA = object[key].reject(&:virtual).length
1017
+ nB = Array(value).reject(&:virtual).length
1018
+ raise Error, "Columns must have the same number of non-virtual columns" unless nA == nB || nB == 0
1019
+ when :foreignKeys
1020
+ # When an array of foreign key definitions B is imported into an original array of foreign key definitions A, each foreign key definition within B which does not appear within A is appended to the original array A.
1021
+ # SPEC CONFUSION: If definitions vary only a little, they should probably be merged (e.g. common properties).
1022
+ object[key] = object[key] + (metadata[key] - object[key])
1023
+ end
1024
+ when :object
1025
+ case key
1026
+ when :notes
1027
+ # If the property accepts arrays, the result is an array of objects or strings: those from A followed by those from B that were not already a value in A.
1028
+ a = object[key] || []
1029
+ object[key] = (a + value).uniq
1030
+ else
1031
+ # if the property only accepts single objects
1032
+ if object[key].is_a?(String) || value.is_a?(String)
1033
+ # if the value of the property in A is a string or the value from B is a string then the value from A overrides that from B
1034
+ object[key] ||= value
1035
+ elsif object[key].is_a?(Metadata)
1036
+ # otherwise (if both values as objects) the objects are merged as described here
1037
+ object[key].merge!(value)
1038
+ elsif object[key].is_a?(Hash)
1039
+ # otherwise (if both values as objects) the objects are merged as described here
1040
+ object[key].merge!(value)
1041
+ else
1042
+ value = value.dup
1043
+ value.instance_variable_set(:@parent, self) if self
1044
+ object[key] = value
1045
+ end
1046
+ end
1047
+ when :natural_language
1048
+ # If the property is a natural language property, the result is an object whose properties are language codes and where the values of those properties are arrays. The suitable language code for the values is either explicit within the existing value or determined through the default language in the metadata document; if it can't be determined the language code und should be used. The arrays should provide the values from A followed by those from B that were not already a value in A.
1049
+ a = object[key] || {}
1050
+ b = value
1051
+ debug("merge!: natural_language") {
1052
+ "A: #{a.inspect}, B: #{b.inspect}"
1053
+ }
1054
+ b.each do |k, v|
1055
+ a[k] = Array(a[k]) + (Array(b[k]) - Array(a[k]))
1056
+ end
1057
+ # eliminate titles with no language where the same string exists with a language
1058
+ if a.has_key?("und")
1059
+ a["und"] = a["und"].reject do |v|
1060
+ a.any? {|lang, values| lang != 'und' && values.include?(v)}
1061
+ end
1062
+ a.delete("und") if a["und"].empty?
1063
+ end
1064
+ object[key] = a
1065
+ when ->(k) {key == :@id}
1066
+ object[key] ||= value
1067
+ @id ||= metadata.id
1068
+ else
1069
+ # Otherwise, the value from A overrides that from B
1070
+ object[key] ||= value
1071
+ end
1072
+ end
1073
+ end
1074
+
1075
+ debug("merge!") {self.inspect}
1076
+ self
1077
+ end
1078
+
1079
+ def inspect
1080
+ self.class.name + object.inspect
1081
+ end
1082
+
1083
+ # Proxy to @object
1084
+ def [](key); object[key]; end
1085
+ def []=(key, value); object[key] = value; end
1086
+ def each(&block); object.each(&block); end
1087
+ def ==(other)
1088
+ object == (other.is_a?(Hash) ? other : other.object)
1089
+ end
1090
+ def to_json(args=nil); object.to_json(args); end
1091
+
1092
+ ##
1093
+ # Normalize object
1094
+ # @raise [Error]
1095
+ # @return [self]
1096
+ def normalize!
1097
+ self.each do |key, value|
1098
+ self[key] = case @properties[key] || INHERITED_PROPERTIES[key]
1099
+ when ->(k) {key.to_s.include?(':') || key == :notes}
1100
+ normalize_jsonld(key, value)
1101
+ when ->(k) {key.to_s == '@context'}
1102
+ "http://www.w3.org/ns/csvw"
1103
+ when :link
1104
+ base.join(value).to_s
1105
+ when :array
1106
+ value = [value] unless value.is_a?(Array)
1107
+ value.map do |v|
1108
+ if v.is_a?(Metadata)
1109
+ v.normalize!
1110
+ elsif v.is_a?(Hash) && (ref = v["reference"]).is_a?(Hash)
1111
+ # SPEC SUGGESTION: special case for foreignKeys
1112
+ ref["resource"] = base.join(ref["resource"]).to_s if ref["resource"]
1113
+ ref["schemaReference"] = base.join(ref["schemaReference"]).to_s if ref["schemaReference"]
1114
+ v
1115
+ else
1116
+ v
1117
+ end
1118
+ end
1119
+ when :object
1120
+ case value
1121
+ when Metadata then value.normalize!
1122
+ when String
1123
+ # Load referenced JSON document
1124
+ # (This is done when objects are loaded in this implementation)
1125
+ raise "unexpected String value of property '#{key}': #{value}"
1126
+ else value
1127
+ end
1128
+ when :natural_language
1129
+ value.is_a?(Hash) ? value : {(context.default_language || 'und') => Array(value)}
1130
+ when :atomic
1131
+ case key
1132
+ when :datatype then normalize_datatype(value)
1133
+ else value
1134
+ end
1135
+ else
1136
+ value
1137
+ end
1138
+ end
1139
+ self
1140
+ end
1141
+
1142
+ ##
1143
+ # Normalize datatype to Object/Hash representation
1144
+ # @param [String, Hash{Symbol => String}] value
1145
+ # @return [Hash{Symbol => String}]
1146
+ def normalize_datatype(value)
1147
+ # Normalize datatype to array of object form
1148
+ value = {base: value} unless value.is_a?(Hash)
1149
+ # Create a new representation using symbols and transformed values
1150
+ nv = {}
1151
+ value.each do |kk, vv|
1152
+ case kk.to_sym
1153
+ when :base, :decimalChar, :format, :groupChar, :pattern then nv[kk.to_sym] = vv
1154
+ when :length, :minLength, :maxLength, :minimum, :maximum,
1155
+ :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
1156
+ nv[kk.to_sym] = vv.to_i
1157
+ end
1158
+ end
1159
+ nv[:base] ||= 'string'
1160
+ nv
1161
+ end
1162
+
1163
+ ##
1164
+ # Normalize JSON-LD
1165
+ #
1166
+ # Also, raise error if invalid JSON-LD dialect is detected
1167
+ #
1168
+ # @param [Symbol, String] property
1169
+ # @param [String, Hash{String => Object}, Array<String, Hash{String => Object}>] value
1170
+ # @return [String, Hash{String => Object}, Array<String, Hash{String => Object}>]
1171
+ def normalize_jsonld(property, value)
1172
+ case value
1173
+ when Array
1174
+ value.map {|v| normalize_jsonld(property, v)}
1175
+ when String
1176
+ ev = {'@value' => value}
1177
+ ev['@language'] = context.default_language if context.default_language
1178
+ ev
1179
+ when Hash
1180
+ if value['@value']
1181
+ if !(value.keys.sort - %w(@value @type @language)).empty?
1182
+ raise Error, "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
1183
+ elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
1184
+ raise Error, "Value object may not contain both @type and @language: #{value.to_json}"
1185
+ elsif value['@language'] && !BCP47::Language.identify(value['@language'])
1186
+ raise Error, "Value object with @language must use valid language: #{value.to_json}"
1187
+ elsif value['@type'] && !context.expand_iri(value['@type'], vocab: true).absolute?
1188
+ raise Error, "Value object with @type must defined type: #{value.to_json}"
1189
+ end
1190
+ value
1191
+ else
1192
+ nv = {}
1193
+ value.each do |k, v|
1194
+ case k
1195
+ when "@id"
1196
+ nv[k] = context.expand_iri(v, documentRelative: true).to_s
1197
+ raise Error, "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
1198
+ when "@type"
1199
+ Array(v).each do |vv|
1200
+ # Validate that all type values transform to absolute IRIs
1201
+ resource = context.expand_iri(vv, vocab: true)
1202
+ raise Error, "Invalid type #{vv} in JSON-LD context" unless resource.uri? && resource.absolute?
1203
+ end
1204
+ nv[k] = v
1205
+ when /^(@|_:)/
1206
+ raise Error, "Invalid use of #{k} in JSON-LD content"
1207
+ else
1208
+ nv[k] = normalize_jsonld(k, v)
1209
+ end
1210
+ end
1211
+ nv
1212
+ end
1213
+ else
1214
+ value
1215
+ end
1216
+ end
1217
+ protected
1218
+
1219
+ # When setting a natural language property, always put in language-map form
1220
+ # @param [Symbol] prop
1221
+ # @param [Hash{String => String, Array<String>}, Array<String>, String] value
1222
+ # @return [Hash{String => Array<String>}]
1223
+ def set_nl(prop, value)
1224
+ object[prop] = case value
1225
+ when String then {(context.default_language || 'und') => [value]}
1226
+ when Array then {(context.default_language || 'und') => value}
1227
+ else value
1228
+ end
1229
+ end
1230
+
1231
+ def inherited_property_value(method)
1232
+ # Inherited properties
1233
+ object.fetch(method.to_sym) do
1234
+ parent.send(method) if parent
1235
+ end
1236
+ end
1237
+
1238
+ ##
1239
+ # Get the root metadata object
1240
+ # @return [TableGroup, Table]
1241
+ def root
1242
+ self.parent ? self.parent.root : self
1243
+ end
1244
+ private
1245
+ # Options passed to CSV.new based on dialect
1246
+ def csv_options
1247
+ {
1248
+ col_sep: (is_a?(Dialect) ? self : dialect).delimiter,
1249
+ row_sep: (is_a?(Dialect) ? self : dialect).lineTerminator,
1250
+ quote_char: (is_a?(Dialect) ? self : dialect).quoteChar,
1251
+ encoding: (is_a?(Dialect) ? self : dialect).encoding
1252
+ }
1253
+ end
1254
+
1255
+ class DebugContext
1256
+ include Utils
1257
+ def initialize(*args, &block)
1258
+ @options = {}
1259
+ debug(*args, &block)
1260
+ end
1261
+ end
1262
+ def self.debug(*args, &block)
1263
+ DebugContext.new(*args, &block)
1264
+ end
1265
+ end
1266
+
1267
+ class TableGroup < Metadata
1268
+ PROPERTIES = {
1269
+ :@id => :link,
1270
+ :@type => :atomic,
1271
+ notes: :array,
1272
+ resources: :array,
1273
+ tableSchema: :object,
1274
+ tableDirection: :atomic,
1275
+ dialect: :object,
1276
+ transformations: :array,
1277
+ }.freeze
1278
+ REQUIRED = [].freeze
1279
+
1280
+ # Setters
1281
+ PROPERTIES.each do |a, type|
1282
+ next if a == :dialect
1283
+ define_method("#{a}=".to_sym) do |value|
1284
+ case type
1285
+ when :natural_language
1286
+ set_nl(a, value)
1287
+ else
1288
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1289
+ end
1290
+ end
1291
+ end
1292
+
1293
+ # Does the Metadata or any descendant have any common properties
1294
+ # @return [Boolean]
1295
+ def has_annotations?
1296
+ super || resources.any? {|t| t.has_annotations? }
1297
+ end
1298
+
1299
+ # Logic for accessing elements as accessors
1300
+ def method_missing(method, *args)
1301
+ if INHERITED_PROPERTIES.has_key?(method.to_sym)
1302
+ inherited_property_value(method.to_sym)
1303
+ else
1304
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1305
+ end
1306
+ end
1307
+
1308
+ ##
1309
+ # Iterate over all resources
1310
+ # @yield [Table]
1311
+ def each_resource
1312
+ resources.map(&:url).each do |url|
1313
+ yield for_table(url)
1314
+ end
1315
+ end
1316
+
1317
+ ##
1318
+ # Return the metadata for a specific table, re-basing context as necessary
1319
+ #
1320
+ # @param [String] url of the table
1321
+ # @return [Table]
1322
+ def for_table(url)
1323
+ # If there are no resources, assume there's one for this table
1324
+ #self.resources ||= [Table.new(url: url)]
1325
+ if table = Array(resources).detect {|t| t.url == url}
1326
+ # Set document base for this table for resolving URLs
1327
+ table.instance_variable_set(:@context, context.dup)
1328
+ table.context.base = url
1329
+ table
1330
+ end
1331
+ end
1332
+
1333
+ # Return Annotated Table Group representation
1334
+ def to_atd
1335
+ {
1336
+ "@id" => id,
1337
+ "@type" => "AnnotatedTableGroup",
1338
+ "resources" => resources.map(&:to_atd)
1339
+ }
1340
+ end
1341
+ end
1342
+
1343
+ class Table < Metadata
1344
+ PROPERTIES = {
1345
+ :@id => :link,
1346
+ :@type => :atomic,
1347
+ dialect: :object,
1348
+ notes: :array,
1349
+ suppressOutput: :atomic,
1350
+ tableDirection: :atomic,
1351
+ tableSchema: :object,
1352
+ transformations: :array,
1353
+ url: :link,
1354
+ }.freeze
1355
+ REQUIRED = [:url].freeze
1356
+
1357
+ # Setters
1358
+ PROPERTIES.each do |a, type|
1359
+ next if a == :dialect
1360
+ define_method("#{a}=".to_sym) do |value|
1361
+ case type
1362
+ when :natural_language
1363
+ set_nl(a, value)
1364
+ else
1365
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1366
+ end
1367
+ end
1368
+ end
1369
+
1370
+ # Does the Metadata or any descendant have any common properties
1371
+ # @return [Boolean]
1372
+ def has_annotations?
1373
+ super || tableSchema && tableSchema.has_annotations?
1374
+ end
1375
+
1376
+ # Return Annotated Table representation
1377
+ def to_atd
1378
+ {
1379
+ "@id" => id,
1380
+ "@type" => "AnnotatedTable",
1381
+ "columns" => tableSchema.columns.map(&:to_atd),
1382
+ "rows" => [],
1383
+ "url" => self.url.to_s
1384
+ }
1385
+ end
1386
+
1387
+ # Logic for accessing elements as accessors
1388
+ def method_missing(method, *args)
1389
+ if INHERITED_PROPERTIES.has_key?(method.to_sym)
1390
+ inherited_property_value(method.to_sym)
1391
+ else
1392
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1393
+ end
1394
+ end
1395
+ end
1396
+
1397
+ class Transformation < Metadata
1398
+ PROPERTIES = {
1399
+ :@id => :link,
1400
+ :@type => :atomic,
1401
+ source: :atomic,
1402
+ targetFormat: :link,
1403
+ scriptFormat: :link,
1404
+ title: :natural_language,
1405
+ url: :link,
1406
+ }.freeze
1407
+ REQUIRED = %w(url targetFormat scriptFormat).map(&:to_sym).freeze
1408
+
1409
+ # Setters
1410
+ PROPERTIES.each do |a, type|
1411
+ define_method("#{a}=".to_sym) do |value|
1412
+ case type
1413
+ when :natural_language
1414
+ set_nl(a, value)
1415
+ else
1416
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1417
+ end
1418
+ end
1419
+ end
1420
+
1421
+ # Logic for accessing elements as accessors
1422
+ def method_missing(method, *args)
1423
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1424
+ end
1425
+ end
1426
+
1427
+ class Schema < Metadata
1428
+ PROPERTIES = {
1429
+ :@id => :link,
1430
+ :@type => :atomic,
1431
+ columns: :array,
1432
+ foreignKeys: :array,
1433
+ primaryKey: :column_reference,
1434
+ }.freeze
1435
+ REQUIRED = [].freeze
1436
+
1437
+ # Setters
1438
+ PROPERTIES.each do |a, type|
1439
+ define_method("#{a}=".to_sym) do |value|
1440
+ case type
1441
+ when :natural_language
1442
+ set_nl(a, value)
1443
+ else
1444
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1445
+ end
1446
+ end
1447
+ end
1448
+
1449
+ # Logic for accessing elements as accessors
1450
+ def method_missing(method, *args)
1451
+ if INHERITED_PROPERTIES.has_key?(method.to_sym)
1452
+ inherited_property_value(method.to_sym)
1453
+ else
1454
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1455
+ end
1456
+ end
1457
+ end
1458
+
1459
+ class Column < Metadata
1460
+ PROPERTIES = {
1461
+ :@id => :link,
1462
+ :@type => :atomic,
1463
+ name: :atomic,
1464
+ suppressOutput: :atomic,
1465
+ title: :natural_language,
1466
+ required: :atomic,
1467
+ virtual: :atomic,
1468
+ }.freeze
1469
+ REQUIRED = [].freeze
1470
+
1471
+ ##
1472
+ # Table containing this column (if any)
1473
+ # @return [Table]
1474
+ def table; @options[:table]; end
1475
+
1476
+ # Column number set on initialization
1477
+ # @return [Integer] 1-based colnum number
1478
+ def number
1479
+ @options.fetch(:number, 0)
1480
+ end
1481
+
1482
+ # Source Column number set on initialization
1483
+ #
1484
+ # @note this is lazy evaluated to avoid dependencies on setting dialect vs. initializing columns
1485
+ # @return [Integer] 1-based colnum number
1486
+ def sourceNumber
1487
+ skipColumns = table ? (dialect.skipColumns.to_i + dialect.headerColumnCount.to_i) : 0
1488
+ number + skipColumns
1489
+ end
1490
+
1491
+ # Does the Metadata or any descendant have any common properties
1492
+ # @return [Boolean]
1493
+ def has_annotations?
1494
+ super || columns.any? {|c| c.has_annotations? }
1495
+ end
1496
+
1497
+ # Setters
1498
+ PROPERTIES.each do |a, type|
1499
+ define_method("#{a}=".to_sym) do |value|
1500
+ case type
1501
+ when :natural_language
1502
+ set_nl(a, value)
1503
+ else
1504
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1505
+ end
1506
+ end
1507
+ end
1508
+
1509
+ # Return or create a name for the column from title, if it exists
1510
+ def name
1511
+ object[:name] ||= if title && (ts = title[context.default_language || 'und'])
1512
+ n = Array(ts).first
1513
+ n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/)
1514
+ n1 = URI.encode(n[1..-1], /[^\w\.]/)
1515
+ "#{n0}#{n1}"
1516
+ end || "_col.#{number}"
1517
+ end
1518
+
1519
+ # Identifier for this Column, as an RFC7111 fragment
1520
+ # @return [RDF::URI]
1521
+ def id;
1522
+ url = table ? table.url : RDF::URI("")
1523
+ url + "#col=#{self.sourceNumber}";
1524
+ end
1525
+
1526
+ # Return Annotated Column representation
1527
+ def to_atd
1528
+ {
1529
+ "@id" => id,
1530
+ "@type" => "Column",
1531
+ "table" => (table.id if table),
1532
+ "number" => self.number,
1533
+ "sourceNumber" => self.sourceNumber,
1534
+ "cells" => [],
1535
+ "virtual" => self.virtual,
1536
+ "name" => self.name,
1537
+ "title" => self.title
1538
+ }
1539
+ end
1540
+
1541
+ # Logic for accessing elements as accessors
1542
+ def method_missing(method, *args)
1543
+ if INHERITED_PROPERTIES.has_key?(method.to_sym)
1544
+ inherited_property_value(method.to_sym)
1545
+ else
1546
+ PROPERTIES.has_key?(method.to_sym) ? object[method.to_sym] : super
1547
+ end
1548
+ end
1549
+ end
1550
+
1551
+ class Dialect < Metadata
1552
+ # Defaults for dialects
1553
+ DIALECT_DEFAULTS = {
1554
+ commentPrefix: nil,
1555
+ delimiter: ",".freeze,
1556
+ doubleQuote: true,
1557
+ encoding: "utf-8".freeze,
1558
+ header: true,
1559
+ headerColumnCount: 0,
1560
+ headerRowCount: 1,
1561
+ lineTerminator: :auto, # SPEC says "\r\n"
1562
+ quoteChar: '"',
1563
+ skipBlankRows: false,
1564
+ skipColumns: 0,
1565
+ skipInitialSpace: false,
1566
+ skipRows: 0,
1567
+ trim: false
1568
+ }.freeze
1569
+
1570
+ PROPERTIES = {
1571
+ :@id => :link,
1572
+ :@type => :atomic,
1573
+ commentPrefix: :atomic,
1574
+ delimiter: :atomic,
1575
+ doubleQuote: :atomic,
1576
+ encoding: :atomic,
1577
+ header: :atomic,
1578
+ headerColumnCount: :atomic,
1579
+ headerRowCount: :atomic,
1580
+ lineTerminator: :atomic,
1581
+ quoteChar: :atomic,
1582
+ skipBlankRows: :atomic,
1583
+ skipColumns: :atomic,
1584
+ skipInitialSpace: :atomic,
1585
+ skipRows: :atomic,
1586
+ trim: :atomic,
1587
+ }.freeze
1588
+
1589
+ REQUIRED = [].freeze
1590
+
1591
+ # Setters
1592
+ PROPERTIES.keys.each do |a|
1593
+ define_method("#{a}=".to_sym) do |value|
1594
+ object[a] = value.to_s =~ /^\d+/ ? value.to_i : value
1595
+ end
1596
+ end
1597
+
1598
+ # escape character
1599
+ # @return [String]
1600
+ def escape_character
1601
+ self.doubleQuote ? '"' : '\\'
1602
+ end
1603
+
1604
+ # default for headerRowCount is zero if header is false
1605
+ # @return [Integer]
1606
+ def headerRowCount
1607
+ object.fetch(:headerRowCount, self.header ? 1 : 0)
1608
+ end
1609
+
1610
+ # default for trim comes from skipInitialSpace
1611
+ # @return [Boolean, String]
1612
+ def trim
1613
+ object.fetch(:trim, self.skipInitialSpace ? 'start' : false)
1614
+ end
1615
+
1616
+ ##
1617
+ # Extract a new Metadata document from the file or data provided
1618
+ #
1619
+ # @param [#read, #to_s] input IO, or file path or URL
1620
+ # @param [Hash{Symbol => Object}] options
1621
+ # any additional options (see `RDF::Util::File.open_file`)
1622
+ # @return [Metadata] Tabular metadata
1623
+ # @see http://w3c.github.io/csvw/syntax/#parsing
1624
+ def embedded_metadata(input, options = {})
1625
+ options = options.dup
1626
+ options.delete(:context) # Don't accidentally use a passed context
1627
+ # Normalize input to an IO object
1628
+ if !input.respond_to?(:read)
1629
+ return ::RDF::Util::File.open_file(input.to_s) {|f| embedded_metadata(f, options.merge(base: input.to_s))}
1630
+ end
1631
+
1632
+ table = {
1633
+ "url" => (options.fetch(:base, "")),
1634
+ "@type" => "Table",
1635
+ "tableSchema" => {
1636
+ "@type" => "Schema",
1637
+ "columns" => []
1638
+ }
1639
+ }
1640
+
1641
+ # Set encoding on input
1642
+ csv = ::CSV.new(input, csv_options)
1643
+ (1..skipRows.to_i).each do
1644
+ value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1645
+ # Trim value
1646
+ value.lstrip! if %w(true start).include?(trim.to_s)
1647
+ value.rstrip! if %w(true end).include?(trim.to_s)
1648
+
1649
+ value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
1650
+ (table["rdfs:comment"] ||= []) << value unless value.empty?
1651
+ end
1652
+ debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
1653
+
1654
+ (1..headerRowCount).each do
1655
+ row_data = Array(csv.shift)
1656
+ Array(row_data).each_with_index do |value, index|
1657
+ # Skip columns
1658
+ skipCols = skipColumns.to_i + headerColumnCount.to_i
1659
+ next if index < skipCols
1660
+
1661
+ # Trim value
1662
+ value.lstrip! if %w(true start).include?(trim.to_s)
1663
+ value.rstrip! if %w(true end).include?(trim.to_s)
1664
+
1665
+ # Initialize title
1666
+ # SPEC CONFUSION: does title get an array, or concatenated values?
1667
+ columns = table["tableSchema"]["columns"] ||= []
1668
+ column = columns[index - skipCols] ||= {
1669
+ "title" => {"und" => []},
1670
+ }
1671
+ column["title"]["und"] << value
1672
+ end
1673
+ end
1674
+ debug("embedded_metadata") {"table: #{table.inspect}"}
1675
+ input.rewind if input.respond_to?(:rewind)
1676
+
1677
+ Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
1678
+ end
1679
+
1680
+ # Logic for accessing elements as accessors
1681
+ def method_missing(method, *args)
1682
+ if DIALECT_DEFAULTS.has_key?(method.to_sym)
1683
+ # As set, or with default
1684
+ object.fetch(method.to_sym, DIALECT_DEFAULTS[method.to_sym])
1685
+ else
1686
+ super
1687
+ end
1688
+ end
1689
+ end
1690
+
1691
+ # Wraps each resulting row
1692
+ class Row
1693
+ # Class for returning values
1694
+ Cell = Struct.new(:table, :column, :row, :stringValue, :aboutUrl, :propertyUrl, :valueUrl, :value, :errors) do
1695
+ def set_urls(mapped_values)
1696
+ %w(aboutUrl propertyUrl valueUrl).each do |prop|
1697
+ # If the cell value is nil, and it is not a virtual column
1698
+ next if prop == "valueUrl" && value.nil? && !column.virtual
1699
+ if v = column.send(prop.to_sym)
1700
+ t = Addressable::Template.new(v)
1701
+ mapped = t.expand(mapped_values).to_s
1702
+ # FIXME: don't expand here, do it in CSV2RDF
1703
+ url = row.context.expand_iri(mapped, documentRelative: true)
1704
+ self.send("#{prop}=".to_sym, url)
1705
+ end
1706
+ end
1707
+ end
1708
+
1709
+ def valid?; Array(errors).empty?; end
1710
+ def to_s; value.to_s; end
1711
+
1712
+ # Identifier for this Cell, as an RFC7111 fragment
1713
+ # @return [RDF::URI]
1714
+ def id; table.url + "#cell=#{self.row.sourceNumber},#{self.column.sourceNumber}"; end
1715
+
1716
+ # Return Annotated Cell representation
1717
+ def to_atd
1718
+ {
1719
+ "@id" => self.id,
1720
+ "@type" => "Cell",
1721
+ "column" => column.id,
1722
+ "row" => row.id,
1723
+ "stringValue" => self.stringValue,
1724
+ "value" => self.value,
1725
+ "errors" => self.errors
1726
+ }
1727
+ end
1728
+ end
1729
+
1730
+ # Row values, hashed by `name`
1731
+ attr_reader :values
1732
+
1733
+ # Row number of this row
1734
+ # @return [Integer]
1735
+ attr_reader :number
1736
+
1737
+ # Row number of this row from the original source
1738
+ # @return [Integer]
1739
+ attr_reader :sourceNumber
1740
+
1741
+ #
1742
+ # Table containing this row
1743
+ # @return [Table]
1744
+ attr_reader :table
1745
+
1746
+ #
1747
+ # Context from Table with base set to table URL for expanding URI Templates
1748
+ # @return [JSON::LD::Context]
1749
+ attr_reader :context
1750
+
1751
+ ##
1752
+ # @param [Array<Array<String>>] row
1753
+ # @param [Metadata] metadata for Table
1754
+ # @param [Integer] number 1-based row number after skipped/header rows
1755
+ # @param [Integer] source_number 1-based row number from source
1756
+ # @return [Row]
1757
+ def initialize(row, metadata, number, source_number)
1758
+ @table = metadata
1759
+ @number = number
1760
+ @sourceNumber = source_number
1761
+ @values = []
1762
+ skipColumns = metadata.dialect.skipColumns.to_i + metadata.dialect.headerColumnCount.to_i
1763
+
1764
+ @context = table.context.dup
1765
+ @context.base = table.url
1766
+
1767
+ # Create values hash
1768
+ # SPEC CONFUSION: are values pre-or-post conversion?
1769
+ map_values = {"_row" => number, "_sourceRow" => source_number}
1770
+
1771
+ columns = metadata.tableSchema.columns ||= []
1772
+
1773
+ # Make sure that the row length is at least as long as the number of column definitions, to implicitly include virtual columns
1774
+ columns.each_with_index {|c, index| row[index] ||= (c.null || '')}
1775
+ row.each_with_index do |value, index|
1776
+
1777
+ next if index < skipColumns
1778
+
1779
+ cell_errors = []
1780
+
1781
+ # create column if necessary
1782
+ columns[index - skipColumns] ||=
1783
+ Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
1784
+
1785
+ column = columns[index - skipColumns]
1786
+
1787
+ @values << cell = Cell.new(metadata, column, self, value)
1788
+
1789
+ datatype = metadata.normalize_datatype(column.datatype || 'string')
1790
+ value = value.gsub(/\r\t\a/, ' ') unless %w(string json xml html anyAtomicType any).include?(datatype[:base])
1791
+ value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType any normalizedString).include?(datatype[:base])
1792
+ # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
1793
+ value = column.default || '' if value.empty?
1794
+
1795
+ cell_values = column.separator ? value.split(column.separator) : [value]
1796
+
1797
+ cell_values = cell_values.map do |v|
1798
+ v = v.strip unless %w(string anyAtomicType any).include?(datatype[:base])
1799
+ v = column.default || '' if v.empty?
1800
+ if Array(column.null).include?(v)
1801
+ nil
1802
+ else
1803
+ # Trim value
1804
+ if %w(string anyAtomicType any).include?(datatype[:base])
1805
+ v.lstrip! if %w(true start).include?(metadata.dialect.trim.to_s)
1806
+ v.rstrip! if %w(true end).include?(metadata.dialect.trim.to_s)
1807
+ else
1808
+ # unless the datatype is string or anyAtomicType or any, strip leading and trailing whitespace from the string value
1809
+ v.strip!
1810
+ end
1811
+
1812
+ expanded_dt = metadata.context.expand_iri(datatype[:base], vocab: true)
1813
+ if (lit_or_errors = value_matching_datatype(v.dup, datatype, expanded_dt, column.lang)).is_a?(RDF::Literal)
1814
+ lit_or_errors
1815
+ else
1816
+ cell_errors += lit_or_errors
1817
+ RDF::Literal(v, language: column.lang)
1818
+ end
1819
+ end
1820
+ end.compact
1821
+
1822
+ cell.value = (column.separator ? cell_values : cell_values.first)
1823
+ cell.errors = cell_errors
1824
+ metadata.send(:debug, "#{self.number}: each_cell ##{self.sourceNumber},#{cell.column.sourceNumber}", cell.errors.join("\n")) unless cell_errors.empty?
1825
+
1826
+ map_values[columns[index - skipColumns].name] = (column.separator ? cell_values.map(&:to_s) : cell_values.first.to_s)
1827
+ end
1828
+
1829
+ # Map URLs for row
1830
+ @values.each_with_index do |cell, index|
1831
+ mapped_values = map_values.merge(
1832
+ "_name" => URI.decode(cell.column.name),
1833
+ "_column" => cell.column.number,
1834
+ "_sourceColumn" => cell.column.sourceNumber
1835
+ )
1836
+ cell.set_urls(mapped_values)
1837
+ end
1838
+ end
1839
+
1840
+ # Identifier for this row, as an RFC7111 fragment
1841
+ # @return [RDF::URI]
1842
+ def id; table.url + "#row=#{self.sourceNumber}"; end
1843
+
1844
+ # Return Annotated Row representation
1845
+ def to_atd
1846
+ {
1847
+ "@id" => self.id,
1848
+ "@type" => "Row",
1849
+ "table" => table.id,
1850
+ "number" => self.number,
1851
+ "sourceNumber" => self.sourceNumber,
1852
+ "cells" => @values.map(&:to_atd)
1853
+ }
1854
+ end
1855
+
1856
+ private
1857
+ #
1858
+ # given a datatype specification, return a literal matching that specififcation, if found, otherwise nil
1859
+ # @return [RDF::Literal]
1860
+ def value_matching_datatype(value, datatype, expanded_dt, language)
1861
+ value_errors = []
1862
+
1863
+ # Check constraints
1864
+ if datatype[:length] && value.length != datatype[:length]
1865
+ value_errors << "#{value} does not have length #{datatype[:length]}"
1866
+ end
1867
+ if datatype[:minLength] && value.length < datatype[:minLength]
1868
+ value_errors << "#{value} does not have length >= #{datatype[:minLength]}"
1869
+ end
1870
+ if datatype[:maxLength] && value.length > datatype[:maxLength]
1871
+ value_errors << "#{value} does not have length <= #{datatype[:maxLength]}"
1872
+ end
1873
+
1874
+ format = datatype[:format]
1875
+ # Datatype specific constraints and conversions
1876
+ case datatype[:base].to_sym
1877
+ when :decimal, :integer, :long, :int, :short, :byte,
1878
+ :nonNegativeInteger, :positiveInteger,
1879
+ :unsignedLong, :unsignedInt, :unsignedShort, :unsignedByte,
1880
+ :nonPositiveInteger, :negativeInteger,
1881
+ :double, :float, :number
1882
+ # Normalize representation based on numeric-specific facets
1883
+ groupChar = datatype.fetch(:groupChar, ',')
1884
+ if datatype[:pattern] && !value.match(Regexp.new(datatype[:pattern]))
1885
+ # pattern facet failed
1886
+ value_errors << "#{value} does not match pattern #{datatype[:pattern]}"
1887
+ end
1888
+ if value.include?(groupChar*2)
1889
+ # pattern facet failed
1890
+ value_errors << "#{value} has repeating #{groupChar.inspect}"
1891
+ end
1892
+ value.gsub!(groupChar, '')
1893
+ value.sub!(datatype.fetch(:decimalChar, '.'), '.')
1894
+
1895
+ # Extract percent or per-mille sign
1896
+ percent = permille = false
1897
+ case value
1898
+ when /%$/
1899
+ value = value[0..-2]
1900
+ percent = true
1901
+ when /‰$/
1902
+ value = value[0..-2]
1903
+ permille = true
1904
+ end
1905
+
1906
+ lit = RDF::Literal(value, datatype: expanded_dt)
1907
+ if percent || permille
1908
+ o = lit.object
1909
+ o = o / 100 if percent
1910
+ o = o / 1000 if permille
1911
+ lit = RDF::Literal(o, datatype: expanded_dt)
1912
+ end
1913
+ when :boolean
1914
+ lit = if format
1915
+ # True/False determined by Y|N values
1916
+ t, f = format.to_s.split('|', 2)
1917
+ case
1918
+ when value == t
1919
+ value = RDF::Literal::TRUE
1920
+ when value == f
1921
+ value = RDF::Literal::FALSE
1922
+ else
1923
+ value_errors << "#{value} does not match boolean format #{format}"
1924
+ RDF::Literal::Boolean.new(value)
1925
+ end
1926
+ else
1927
+ if %w(1 true).include?(value.downcase)
1928
+ RDF::Literal::TRUE
1929
+ elsif %w(0 false).include?(value.downcase)
1930
+ RDF::Literal::FALSE
1931
+ end
1932
+ end
1933
+ when :date, :time, :dateTime, :dateTimeStamp, :datetime
1934
+ # Match values
1935
+ tz, date_format, time_format = nil, nil, nil
1936
+
1937
+ # Extract tz info
1938
+ if format && (md = format.match(/^(.*[dyms])+(\s*[xX]{1,5})$/))
1939
+ format, tz = md[1], md[2]
1940
+ end
1941
+
1942
+ if format
1943
+ date_format, time_format = format.split(' ')
1944
+ if datatype[:base].to_sym == :time
1945
+ date_format, time_format = nil, date_format
1946
+ end
1947
+
1948
+ # Extract date, of specified
1949
+ date_part = case date_format
1950
+ when 'yyyy-MM-dd' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})/)
1951
+ when 'yyyyMMdd' then value.match(/^(?<yr>\d{4})(?<mo>\d{2})(?<da>\d{2})/)
1952
+ when 'dd-MM-yyyy' then value.match(/^(?<da>\d{2})-(?<mo>\d{2})-(?<yr>\d{4})/)
1953
+ when 'd-M-yyyy' then value.match(/^(?<da>\d{1,2})-(?<mo>\d{1,2})-(?<yr>\d{4})/)
1954
+ when 'MM-dd-yyyy' then value.match(/^(?<mo>\d{2})-(?<da>\d{2})-(?<yr>\d{4})/)
1955
+ when 'M-d-yyyy' then value.match(/^(?<mo>\d{1,2})-(?<da>\d{1,2})-(?<yr>\d{4})/)
1956
+ when 'dd/MM/yyyy' then value.match(/^(?<da>\d{2})\/(?<mo>\d{2})\/(?<yr>\d{4})/)
1957
+ when 'd/M/yyyy' then value.match(/^(?<da>\d{1,2})\/(?<mo>\d{1,2})\/(?<yr>\d{4})/)
1958
+ when 'MM/dd/yyyy' then value.match(/^(?<mo>\d{2})\/(?<da>\d{2})\/(?<yr>\d{4})/)
1959
+ when 'M/d/yyyy' then value.match(/^(?<mo>\d{1,2})\/(?<da>\d{1,2})\/(?<yr>\d{4})/)
1960
+ when 'dd.MM.yyyy' then value.match(/^(?<da>\d{2})\.(?<mo>\d{2})\.(?<yr>\d{4})/)
1961
+ when 'd.M.yyyy' then value.match(/^(?<da>\d{1,2})\.(?<mo>\d{1,2})\.(?<yr>\d{4})/)
1962
+ when 'MM.dd.yyyy' then value.match(/^(?<mo>\d{2})\.(?<da>\d{2})\.(?<yr>\d{4})/)
1963
+ when 'M.d.yyyy' then value.match(/^(?<mo>\d{1,2})\.(?<da>\d{1,2})\.(?<yr>\d{4})/)
1964
+ when 'yyyy-MM-ddTHH:mm:ss' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
1965
+ when 'yyyy-MM-ddTHH:mm' then value.match(/^(?<yr>\d{4})-(?<mo>\d{2})-(?<da>\d{2})T(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
1966
+ else
1967
+ value_errors << "unrecognized date/time format #{date_format}" if date_format
1968
+ nil
1969
+ end
1970
+
1971
+ # Forward past date part
1972
+ if date_part
1973
+ value = value[date_part.to_s.length..-1]
1974
+ value = value.lstrip if date_part && value.start_with?(' ')
1975
+ end
1976
+
1977
+ # Extract time, of specified
1978
+ time_part = case time_format
1979
+ when 'HH:mm:ss' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2}):(?<se>\d{2})/)
1980
+ when 'HHmmss' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>\d{2})/)
1981
+ when 'HH:mm' then value.match(/^(?<hr>\d{2}):(?<mi>\d{2})(?<se>)/)
1982
+ when 'HHmm' then value.match(/^(?<hr>\d{2})(?<mi>\d{2})(?<se>)/)
1983
+ else
1984
+ value_errors << "unrecognized date/time format #{time_format}" if time_format
1985
+ nil
1986
+ end
1987
+
1988
+ # Forward past time part
1989
+ value = value[time_part.to_s.length..-1] if time_part
1990
+
1991
+ # Use datetime match for time
1992
+ time_part = date_part if date_part && date_part.names.include?("hr")
1993
+
1994
+ # If there's a timezone, it may optionally start with whitespace
1995
+ value = value.lstrip if tz.to_s.start_with?(' ')
1996
+ tz_part = value if tz
1997
+
1998
+ # Compose normalized value
1999
+ vd = ("%04d-%02d-%02d" % [date_part[:yr], date_part[:mo], date_part[:da]]) if date_part
2000
+ vt = ("%02d:%02d:%02d" % [time_part[:hr], time_part[:mi], time_part[:se].to_i]) if time_part
2001
+ value = [vd, vt].compact.join('T')
2002
+ value += tz_part.to_s
2003
+ end
2004
+
2005
+ lit = RDF::Literal(value, datatype: expanded_dt)
2006
+ when :duration, :dayTimeDuration, :yearMonthDuration
2007
+ # SPEC CONFUSION: surely format also includes that for other duration types?
2008
+ lit = RDF::Literal(value, datatype: expanded_dt)
2009
+ when :anyType, :anySimpleType, :ENTITIES, :IDREFS, :NMTOKENS,
2010
+ :ENTITY, :ID, :IDREF, :NOTATION
2011
+ value_errors << "#{value} uses unsupported datatype: #{datatype[:base]}"
2012
+ else
2013
+ # For other types, format is a regexp
2014
+ unless format.nil? || value.match(Regexp.new(format))
2015
+ value_errors << "#{value} does not match format #{format}"
2016
+ end
2017
+ lit = if value_errors.empty?
2018
+ if expanded_dt == RDF::XSD.string
2019
+ # Type string will still use language
2020
+ RDF::Literal(value, language: language)
2021
+ else
2022
+ RDF::Literal(value, datatype: expanded_dt)
2023
+ end
2024
+ end
2025
+ end
2026
+
2027
+ # Final value is a valid literal, or a plain literal otherwise
2028
+ value_errors << "#{value} is not a valid #{datatype[:base]}" if lit && !lit.valid?
2029
+
2030
+ # FIXME Value constraints
2031
+
2032
+ value_errors.empty? ? lit : value_errors
2033
+ end
2034
+ end
2035
+
2036
+ # Metadata errors detected
2037
+ class Error < StandardError; end
2038
+ end