rdf-tabular 0.3.0 → 0.4.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/VERSION +1 -1
- data/lib/rdf/tabular.rb +0 -1
- data/lib/rdf/tabular/format.rb +16 -0
- data/lib/rdf/tabular/metadata.rb +223 -191
- data/lib/rdf/tabular/reader.rb +84 -133
- data/lib/rdf/tabular/uax35.rb +0 -2
- data/spec/format_spec.rb +34 -0
- data/spec/matchers.rb +3 -78
- data/spec/metadata_spec.rb +150 -80
- data/spec/reader_spec.rb +27 -24
- data/spec/spec_helper.rb +4 -3
- data/spec/suite_helper.rb +1 -1
- data/spec/suite_spec.rb +8 -9
- metadata +109 -60
- data/lib/rdf/tabular/utils.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b34ec5c872bbf6e8d8f13559b255283cd118cd46
|
4
|
+
data.tar.gz: 08ace967385cb72fdc48e48ad434f7e0bd35753d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55a2305ce14c365631a1f7ad178e4b2c603ef2279c74537cb36f33a47ef81a19b250b1cad64719be5c1921536e06ee2ba4bd2fa2745dff81bfe652ed31ed823d
|
7
|
+
data.tar.gz: 9a3b83c57938b94ebf1ab86052a0bccd144c9380394150748a6e580bf480e1bf39f835a9b2ff2633b3d6ca09823b782c945f3a40fbeec653d6c0ae61218805af
|
data/README.md
CHANGED
@@ -13,6 +13,7 @@ RDF::Tabular parses CSV or other Tabular Data into [RDF][] and JSON using the [W
|
|
13
13
|
|
14
14
|
* Parses [number patterns](http://www.unicode.org/reports/tr35/tr35-39/tr35-numbers.html#Number_Patterns) from [UAX35][]
|
15
15
|
* Parses [date formats](http://www.unicode.org/reports/tr35/tr35-39/tr35-dates.html#Contents) from [UAX35][]
|
16
|
+
* Returns detailed errors and warnings using optional `Logger`.
|
16
17
|
|
17
18
|
## Installation
|
18
19
|
Install with `gem install rdf-tabular`
|
@@ -247,8 +248,8 @@ Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-tabular/f
|
|
247
248
|
* {RDF::Tabular::Reader}
|
248
249
|
|
249
250
|
## Dependencies
|
250
|
-
* [Ruby](http://ruby-lang.org/) (>= 2.0
|
251
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (>=
|
251
|
+
* [Ruby](http://ruby-lang.org/) (>= 2.0)
|
252
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0)
|
252
253
|
* [JSON](https://rubygems.org/gems/json) (>= 1.5)
|
253
254
|
|
254
255
|
## Installation
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0.beta2
|
data/lib/rdf/tabular.rb
CHANGED
@@ -11,7 +11,6 @@ module RDF
|
|
11
11
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
|
-
require 'rdf/tabular/utils'
|
15
14
|
autoload :Column, 'rdf/tabular/metadata'
|
16
15
|
autoload :CSVW, 'rdf/tabular/csvw'
|
17
16
|
autoload :Dialect, 'rdf/tabular/metadata'
|
data/lib/rdf/tabular/format.rb
CHANGED
@@ -46,5 +46,21 @@ module RDF::Tabular
|
|
46
46
|
def self.detect(sample)
|
47
47
|
!!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
|
48
48
|
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# Hash of CLI commands appropriate for this format
|
52
|
+
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
|
+
def self.cli_commands
|
54
|
+
{
|
55
|
+
:"tabular-json" => ->(argv, opts) do
|
56
|
+
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
57
|
+
out = opts[:output] || $stdout
|
58
|
+
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
59
|
+
RDF::CLI.parse(argv, opts) do |reader|
|
60
|
+
out.puts reader.to_json
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
end
|
49
65
|
end
|
50
66
|
end
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
19
19
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
|
-
include
|
22
|
+
include RDF::Util::Logger
|
23
23
|
|
24
24
|
# Hash representation
|
25
25
|
# @return [Hash<Symbol,Object>]
|
26
26
|
attr_accessor :object
|
27
27
|
|
28
|
-
# Warnings detected on initialization or when setting properties
|
29
|
-
# @return [Array<String>]
|
30
|
-
attr_accessor :warnings
|
31
|
-
|
32
28
|
# Inheritect properties, valid for all types
|
33
29
|
INHERITED_PROPERTIES = {
|
34
30
|
aboutUrl: :uri_template,
|
@@ -179,7 +175,6 @@ module RDF::Tabular
|
|
179
175
|
# @return [Metadata]
|
180
176
|
def self.for_input(input, options = {})
|
181
177
|
base = options[:base]
|
182
|
-
warnings = options.fetch(:warnings, [])
|
183
178
|
|
184
179
|
# Use user metadata, if provided
|
185
180
|
metadata = case options[:metadata]
|
@@ -202,10 +197,7 @@ module RDF::Tabular
|
|
202
197
|
if md.describes_file?(base)
|
203
198
|
metadata = md
|
204
199
|
else
|
205
|
-
|
206
|
-
if options[:validate] && !options[:warnings]
|
207
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
208
|
-
end
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
209
201
|
end
|
210
202
|
end
|
211
203
|
end
|
@@ -214,12 +206,12 @@ module RDF::Tabular
|
|
214
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
215
207
|
if !metadata && base
|
216
208
|
templates = site_wide_config(base)
|
217
|
-
|
209
|
+
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
218
210
|
locs = templates.map do |template|
|
219
211
|
t = Addressable::Template.new(template)
|
220
212
|
RDF::URI(base).join(t.expand(url: base).to_s)
|
221
213
|
end
|
222
|
-
|
214
|
+
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
223
215
|
|
224
216
|
locs.each do |loc|
|
225
217
|
metadata ||= begin
|
@@ -230,15 +222,12 @@ module RDF::Tabular
|
|
230
222
|
if md.describes_file?(base)
|
231
223
|
md
|
232
224
|
else
|
233
|
-
|
234
|
-
if options[:validate] && !options[:warnings]
|
235
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
236
|
-
end
|
225
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
237
226
|
nil
|
238
227
|
end
|
239
228
|
end
|
240
229
|
rescue IOError
|
241
|
-
|
230
|
+
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
242
231
|
nil
|
243
232
|
end
|
244
233
|
end
|
@@ -331,7 +320,6 @@ module RDF::Tabular
|
|
331
320
|
# @return [Metadata]
|
332
321
|
def initialize(input, options = {})
|
333
322
|
@options = options.dup
|
334
|
-
@options[:depth] ||= 0
|
335
323
|
|
336
324
|
# Parent of this Metadata, if any
|
337
325
|
@parent = @options[:parent]
|
@@ -344,14 +332,14 @@ module RDF::Tabular
|
|
344
332
|
|
345
333
|
@context = case input['@context']
|
346
334
|
when Array
|
347
|
-
|
335
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
348
336
|
c = LOCAL_CONTEXT.dup
|
349
337
|
c.base = RDF::URI(opt_base)
|
350
338
|
obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
|
351
339
|
raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
|
352
340
|
c.parse(obj)
|
353
341
|
when Hash
|
354
|
-
|
342
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
355
343
|
c = LOCAL_CONTEXT.dup
|
356
344
|
c.base = RDF::URI(opt_base)
|
357
345
|
c.parse(input['@context'])
|
@@ -362,7 +350,7 @@ module RDF::Tabular
|
|
362
350
|
c
|
363
351
|
else
|
364
352
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
365
|
-
|
353
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
366
354
|
LOCAL_CONTEXT.dup
|
367
355
|
c = LOCAL_CONTEXT.dup
|
368
356
|
c.base = RDF::URI(opt_base)
|
@@ -375,7 +363,7 @@ module RDF::Tabular
|
|
375
363
|
@options[:base] = @context ? @context.base : RDF::URI(opt_base)
|
376
364
|
|
377
365
|
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
378
|
-
|
366
|
+
log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
379
367
|
@context.default_language = nil
|
380
368
|
end
|
381
369
|
|
@@ -385,7 +373,7 @@ module RDF::Tabular
|
|
385
373
|
|
386
374
|
@object = {}
|
387
375
|
|
388
|
-
|
376
|
+
log_depth do
|
389
377
|
# Input was parsed in .new
|
390
378
|
# Metadata is object with symbolic keys
|
391
379
|
input.each do |key, value|
|
@@ -401,7 +389,7 @@ module RDF::Tabular
|
|
401
389
|
object[:@id] = if value.is_a?(String)
|
402
390
|
value
|
403
391
|
else
|
404
|
-
|
392
|
+
log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
405
393
|
"" # Default value
|
406
394
|
end
|
407
395
|
@id = @options[:base].join(object[:@id])
|
@@ -426,9 +414,9 @@ module RDF::Tabular
|
|
426
414
|
end
|
427
415
|
|
428
416
|
if reason
|
429
|
-
|
430
|
-
|
431
|
-
|
417
|
+
log_debug("md#initialize") {reason}
|
418
|
+
log_debug("md#initialize") {"filenames: #{filenames}"}
|
419
|
+
log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
|
432
420
|
end
|
433
421
|
end
|
434
422
|
|
@@ -487,7 +475,7 @@ module RDF::Tabular
|
|
487
475
|
when Schema
|
488
476
|
value
|
489
477
|
else
|
490
|
-
|
478
|
+
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
491
479
|
Schema.new({}, @options.merge(parent: self, context: nil))
|
492
480
|
end
|
493
481
|
end
|
@@ -534,7 +522,7 @@ module RDF::Tabular
|
|
534
522
|
when Dialect
|
535
523
|
value
|
536
524
|
else
|
537
|
-
|
525
|
+
log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
538
526
|
nil
|
539
527
|
end
|
540
528
|
end
|
@@ -544,15 +532,15 @@ module RDF::Tabular
|
|
544
532
|
# @raise [Error] if datatype is not valid
|
545
533
|
def datatype=(value)
|
546
534
|
val = case value
|
547
|
-
when Hash then Datatype.new(value, parent: self)
|
548
|
-
else Datatype.new({base: value}, parent: self)
|
535
|
+
when Hash then Datatype.new(value, @options.merge(parent: self))
|
536
|
+
else Datatype.new({base: value}, @options.merge(parent: self))
|
549
537
|
end
|
550
538
|
|
551
539
|
if val.valid? || value.is_a?(Hash)
|
552
540
|
# Set it if it was specified as an object, which may cause validation errors later
|
553
541
|
object[:datatype] = val
|
554
542
|
else
|
555
|
-
|
543
|
+
log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
|
556
544
|
end
|
557
545
|
end
|
558
546
|
|
@@ -567,40 +555,20 @@ module RDF::Tabular
|
|
567
555
|
##
|
568
556
|
# Do we have valid metadata?
|
569
557
|
def valid?
|
570
|
-
validate
|
571
|
-
|
572
|
-
rescue
|
573
|
-
false
|
574
|
-
end
|
575
|
-
|
576
|
-
##
|
577
|
-
# Validation errors
|
578
|
-
# @return [Array<String>]
|
579
|
-
def errors
|
580
|
-
validate! && []
|
581
|
-
rescue Error => e
|
582
|
-
e.message.split("\n")
|
558
|
+
validate # Possibly re-validate
|
559
|
+
!log_statistics[:error]
|
583
560
|
end
|
584
561
|
|
585
|
-
|
586
|
-
|
587
|
-
# @return [Array<String>]
|
588
|
-
def warnings
|
589
|
-
((@warnings || []) + object.
|
590
|
-
values.
|
591
|
-
flatten.
|
592
|
-
select {|v| v.is_a?(Metadata)}.
|
593
|
-
map(&:warnings).
|
594
|
-
flatten).compact.uniq
|
562
|
+
def validate!
|
563
|
+
raise Error, "Metadata error" unless valid?
|
595
564
|
end
|
596
565
|
|
597
566
|
##
|
598
567
|
# Validate metadata, raising an error containing all errors detected during validation
|
599
568
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
600
569
|
# @return [self]
|
601
|
-
def validate
|
570
|
+
def validate
|
602
571
|
expected_props, required_props = @properties.keys, @required
|
603
|
-
errors = []
|
604
572
|
|
605
573
|
unless is_a?(Dialect) || is_a?(Transformation)
|
606
574
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -609,10 +577,10 @@ module RDF::Tabular
|
|
609
577
|
# It has only expected properties (exclude metadata)
|
610
578
|
check_keys = object.keys - [:"@id", :"@context"]
|
611
579
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
612
|
-
|
580
|
+
log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
613
581
|
|
614
582
|
# It has required properties
|
615
|
-
|
583
|
+
log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
616
584
|
|
617
585
|
self.normalize!
|
618
586
|
|
@@ -621,55 +589,49 @@ module RDF::Tabular
|
|
621
589
|
value = object[key]
|
622
590
|
case key
|
623
591
|
when :base
|
624
|
-
|
592
|
+
log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
|
625
593
|
when :columns
|
626
|
-
value.each do |
|
627
|
-
|
628
|
-
|
629
|
-
rescue Error => e
|
630
|
-
errors << e.message
|
631
|
-
end
|
594
|
+
value.each do |col|
|
595
|
+
col.validate
|
596
|
+
log_statistics.merge!(col.log_statistics)
|
632
597
|
end
|
633
598
|
column_names = value.map(&:name)
|
634
|
-
|
599
|
+
log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
635
600
|
when :datatype, :dialect, :tables, :tableSchema, :transformations
|
636
601
|
Array(value).each do |t|
|
637
602
|
# Make sure value is of appropriate class
|
638
603
|
if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
|
639
|
-
|
640
|
-
|
641
|
-
rescue Error => e
|
642
|
-
errors << e.message
|
643
|
-
end
|
604
|
+
t.validate
|
605
|
+
log_statistics.merge!(t.log_statistics)
|
644
606
|
else
|
645
|
-
|
607
|
+
log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
|
646
608
|
end
|
647
609
|
end
|
648
|
-
|
610
|
+
log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
|
649
611
|
when :foreignKeys
|
650
612
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
651
613
|
value.each do |fk|
|
652
614
|
columnReference, reference = fk['columnReference'], fk['reference']
|
653
|
-
|
654
|
-
|
615
|
+
log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
|
616
|
+
log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
|
655
617
|
|
656
618
|
# Verify that columns exist in this schema
|
657
|
-
|
619
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
|
658
620
|
Array(columnReference).each do |k|
|
659
|
-
|
621
|
+
log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
|
660
622
|
end
|
661
623
|
|
662
624
|
if reference.is_a?(Hash)
|
663
|
-
|
625
|
+
log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
|
664
626
|
ref_cols = reference['columnReference']
|
665
627
|
schema = if reference.has_key?('resource')
|
666
628
|
if reference.has_key?('schemaReference')
|
667
|
-
|
629
|
+
log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
668
630
|
end
|
669
631
|
# resource is the URL of a Table in the TableGroup
|
670
632
|
ref = context.base.join(reference['resource']).to_s
|
671
|
-
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
672
|
-
|
633
|
+
table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
|
634
|
+
log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
673
635
|
table.tableSchema if table
|
674
636
|
elsif reference.has_key?('schemaReference')
|
675
637
|
# resource is the @id of a Schema in the TableGroup
|
@@ -677,25 +639,25 @@ module RDF::Tabular
|
|
677
639
|
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
678
640
|
case tables.length
|
679
641
|
when 0
|
680
|
-
|
642
|
+
log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
681
643
|
nil
|
682
644
|
when 1
|
683
645
|
tables.first.tableSchema
|
684
646
|
else
|
685
|
-
|
647
|
+
log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
686
648
|
nil
|
687
649
|
end
|
688
650
|
end
|
689
651
|
|
690
652
|
if schema
|
691
653
|
# ref_cols must exist in schema
|
692
|
-
|
654
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
|
693
655
|
Array(ref_cols).each do |k|
|
694
|
-
|
656
|
+
log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
|
695
657
|
end
|
696
658
|
end
|
697
659
|
else
|
698
|
-
|
660
|
+
log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
699
661
|
end
|
700
662
|
end
|
701
663
|
when :format
|
@@ -707,7 +669,7 @@ module RDF::Tabular
|
|
707
669
|
nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
|
708
670
|
unsignedLong unsignedInt unsignedShort unsignedByte
|
709
671
|
).include?(self.base)
|
710
|
-
|
672
|
+
log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
|
711
673
|
object.delete(:format) # act as if not set
|
712
674
|
end
|
713
675
|
|
@@ -715,14 +677,14 @@ module RDF::Tabular
|
|
715
677
|
begin
|
716
678
|
parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
|
717
679
|
rescue ArgumentError => e
|
718
|
-
|
719
|
-
object[:format].delete("pattern") # act as if not set
|
680
|
+
log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
|
681
|
+
object[:format].delete("pattern") if object[:format] # act as if not set
|
720
682
|
end
|
721
683
|
else
|
722
684
|
case self.base
|
723
685
|
when 'boolean'
|
724
686
|
unless value.split("|").length == 2
|
725
|
-
|
687
|
+
log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
|
726
688
|
object.delete(:format) # act as if not set
|
727
689
|
end
|
728
690
|
when :decimal, :integer, :long, :int, :short, :byte,
|
@@ -733,7 +695,7 @@ module RDF::Tabular
|
|
733
695
|
begin
|
734
696
|
parse_uax35_number(value, nil)
|
735
697
|
rescue ArgumentError => e
|
736
|
-
|
698
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
737
699
|
object.delete(:format) # act as if not set
|
738
700
|
end
|
739
701
|
when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
|
@@ -741,7 +703,7 @@ module RDF::Tabular
|
|
741
703
|
begin
|
742
704
|
parse_uax35_date(value, nil)
|
743
705
|
rescue ArgumentError => e
|
744
|
-
|
706
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
745
707
|
object.delete(:format) # act as if not set
|
746
708
|
end
|
747
709
|
else
|
@@ -749,7 +711,7 @@ module RDF::Tabular
|
|
749
711
|
begin
|
750
712
|
Regexp.compile(value)
|
751
713
|
rescue
|
752
|
-
|
714
|
+
log_warn "#{type} has invalid property '#{key}': #{$!.message}"
|
753
715
|
object.delete(:format) # act as if not set
|
754
716
|
end
|
755
717
|
end
|
@@ -760,20 +722,20 @@ module RDF::Tabular
|
|
760
722
|
if object[:length]
|
761
723
|
case key
|
762
724
|
when :minLength
|
763
|
-
|
725
|
+
log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
|
764
726
|
when :maxLength
|
765
|
-
|
727
|
+
log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
|
766
728
|
end
|
767
729
|
end
|
768
730
|
|
769
731
|
# Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
|
770
732
|
if key == :maxLength && object[:minLength]
|
771
|
-
|
733
|
+
log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
|
772
734
|
end
|
773
735
|
|
774
736
|
# Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
|
775
737
|
unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
|
776
|
-
|
738
|
+
log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
|
777
739
|
end
|
778
740
|
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
779
741
|
case self.base
|
@@ -781,46 +743,46 @@ module RDF::Tabular
|
|
781
743
|
'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
|
782
744
|
'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
|
783
745
|
'duration', 'dayTimeDuration', 'yearMonthDuration'
|
784
|
-
|
746
|
+
log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
|
785
747
|
|
786
748
|
case key
|
787
749
|
when :minInclusive
|
788
750
|
# Applications MUST raise an error if both minInclusive and minExclusive are specified
|
789
|
-
|
751
|
+
log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
|
790
752
|
|
791
753
|
# Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
|
792
|
-
|
754
|
+
log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
|
793
755
|
|
794
756
|
# Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
|
795
|
-
|
757
|
+
log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
|
796
758
|
when :maxInclusive
|
797
759
|
# Applications MUST raise an error if both maxInclusive and maxExclusive are specified
|
798
|
-
|
760
|
+
log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
|
799
761
|
when :minExclusive
|
800
762
|
# Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
|
801
|
-
|
763
|
+
log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
|
802
764
|
|
803
765
|
# Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
|
804
|
-
|
766
|
+
log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
|
805
767
|
end
|
806
768
|
else
|
807
|
-
|
769
|
+
log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
|
808
770
|
end
|
809
771
|
when :notes
|
810
772
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
811
|
-
|
773
|
+
log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
812
774
|
end
|
813
775
|
begin
|
814
776
|
normalize_jsonld(key, value)
|
815
777
|
rescue Error => e
|
816
|
-
|
778
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
817
779
|
end
|
818
780
|
when :primaryKey, :rowTitles
|
819
781
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
820
782
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
821
783
|
Array(value).each do |k|
|
822
784
|
unless self.columns.any? {|c| c[:name] == k}
|
823
|
-
|
785
|
+
log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
|
824
786
|
object.delete(key)
|
825
787
|
end
|
826
788
|
end
|
@@ -829,34 +791,33 @@ module RDF::Tabular
|
|
829
791
|
when :@id
|
830
792
|
# Must not be a BNode
|
831
793
|
if value.to_s.start_with?("_:")
|
832
|
-
|
794
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
833
795
|
end
|
834
796
|
|
835
797
|
# Datatype @id MUST NOT be the URL of a built-in type
|
836
798
|
if self.is_a?(Datatype) && DATATYPES.values.include?(value)
|
837
|
-
|
799
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
|
838
800
|
end
|
839
801
|
when :@type
|
840
802
|
# Must not be a BNode
|
841
803
|
if value.to_s.start_with?("_:")
|
842
|
-
|
804
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
|
843
805
|
end
|
844
806
|
case type
|
845
807
|
when :Transformation
|
846
|
-
|
808
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
|
847
809
|
else
|
848
|
-
|
810
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
|
849
811
|
end
|
850
812
|
when ->(k) {key.to_s.include?(':')}
|
851
813
|
begin
|
852
814
|
normalize_jsonld(key, value)
|
853
815
|
rescue Error => e
|
854
|
-
|
816
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
855
817
|
end
|
856
818
|
end
|
857
819
|
end
|
858
820
|
|
859
|
-
raise Error, errors.join("\n") unless errors.empty?
|
860
821
|
self
|
861
822
|
end
|
862
823
|
|
@@ -885,10 +846,37 @@ module RDF::Tabular
|
|
885
846
|
# @param [:read] input
|
886
847
|
# @yield [Row]
|
887
848
|
def each_row(input)
|
888
|
-
csv =
|
889
|
-
|
890
|
-
|
891
|
-
|
849
|
+
csv, number, skipped = nil, 0, 0
|
850
|
+
path = input.base_uri.path rescue ""
|
851
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
852
|
+
# Input is HTML; use fragment identfier to find table.
|
853
|
+
fragment = RDF::URI(self.url).fragment rescue nil
|
854
|
+
tab = begin
|
855
|
+
# Extract with nokogiri
|
856
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
857
|
+
doc = Nokogiri::HTML.parse(input)
|
858
|
+
doc.search("##{fragment}").first if fragment
|
859
|
+
rescue LoadError
|
860
|
+
# Extract with REXML
|
861
|
+
# FIXME
|
862
|
+
end
|
863
|
+
|
864
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
865
|
+
|
866
|
+
# Use rows with <td> to create column data
|
867
|
+
csv = []
|
868
|
+
number = 0
|
869
|
+
tab.xpath('.//tr').map do |row|
|
870
|
+
number += 1 if row.xpath('th')
|
871
|
+
data = row.xpath('td').map(&:content)
|
872
|
+
csv << data unless data.empty?
|
873
|
+
end
|
874
|
+
else
|
875
|
+
csv = ::CSV.new(input, csv_options)
|
876
|
+
# Skip skipRows and headerRowCount
|
877
|
+
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
|
+
(1..skipped).each {csv.shift}
|
879
|
+
end
|
892
880
|
csv.each do |data|
|
893
881
|
# Check for embedded comments
|
894
882
|
if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
|
@@ -934,17 +922,17 @@ module RDF::Tabular
|
|
934
922
|
if value['@value']
|
935
923
|
dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
|
936
924
|
lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
|
937
|
-
block.call(RDF::Statement
|
925
|
+
block.call(RDF::Statement(subject, property, lit))
|
938
926
|
else
|
939
927
|
# value MUST be a node object, establish a new subject from `@id`
|
940
928
|
s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
|
941
929
|
|
942
930
|
# Generate a triple
|
943
|
-
block.call(RDF::Statement
|
931
|
+
block.call(RDF::Statement(subject, property, s2))
|
944
932
|
|
945
933
|
# Generate types
|
946
934
|
Array(value['@type']).each do |t|
|
947
|
-
block.call(RDF::Statement
|
935
|
+
block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
|
948
936
|
end
|
949
937
|
|
950
938
|
# Generate triples for all other properties
|
@@ -956,7 +944,7 @@ module RDF::Tabular
|
|
956
944
|
else
|
957
945
|
# Value is a primitive JSON value
|
958
946
|
lit = RDF::Literal(value)
|
959
|
-
block.call(RDF::Statement
|
947
|
+
block.call(RDF::Statement(subject, property, RDF::Literal(value)))
|
960
948
|
end
|
961
949
|
else
|
962
950
|
case value
|
@@ -1011,7 +999,7 @@ module RDF::Tabular
|
|
1011
999
|
if @options[:validate]
|
1012
1000
|
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1013
1001
|
else
|
1014
|
-
|
1002
|
+
log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1015
1003
|
end
|
1016
1004
|
end
|
1017
1005
|
else
|
@@ -1020,7 +1008,7 @@ module RDF::Tabular
|
|
1020
1008
|
if @options[:validate]
|
1021
1009
|
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1022
1010
|
else
|
1023
|
-
|
1011
|
+
log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1024
1012
|
end
|
1025
1013
|
end
|
1026
1014
|
|
@@ -1033,7 +1021,7 @@ module RDF::Tabular
|
|
1033
1021
|
if @options[:validate]
|
1034
1022
|
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1035
1023
|
else
|
1036
|
-
|
1024
|
+
log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1037
1025
|
|
1038
1026
|
# If present, a virtual column MUST appear after all other non-virtual column definitions
|
1039
1027
|
raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
|
@@ -1048,13 +1036,13 @@ module RDF::Tabular
|
|
1048
1036
|
end
|
1049
1037
|
index = 0
|
1050
1038
|
object_columns.all? do |cb|
|
1051
|
-
ca = non_virtual_columns[index] || Column.new({})
|
1039
|
+
ca = non_virtual_columns[index] || Column.new({}, @options)
|
1052
1040
|
ta = ca.titles || {}
|
1053
1041
|
tb = cb.titles || {}
|
1054
1042
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1055
1043
|
true
|
1056
1044
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1057
|
-
raise Error, "Columns don't match
|
1045
|
+
raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
|
1058
1046
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1059
1047
|
# If validating, column compatibility requires strict match between titles
|
1060
1048
|
titles_match = case
|
@@ -1078,10 +1066,10 @@ module RDF::Tabular
|
|
1078
1066
|
true
|
1079
1067
|
elsif !@options[:validate]
|
1080
1068
|
# If not validating, columns don't match, but processing continues
|
1081
|
-
|
1069
|
+
log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1082
1070
|
true
|
1083
1071
|
else
|
1084
|
-
raise Error, "Columns don't match
|
1072
|
+
raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1085
1073
|
end
|
1086
1074
|
end
|
1087
1075
|
index += 1
|
@@ -1175,13 +1163,13 @@ module RDF::Tabular
|
|
1175
1163
|
when Hash
|
1176
1164
|
if value['@value']
|
1177
1165
|
if !(value.keys.sort - %w(@value @type @language)).empty?
|
1178
|
-
|
1166
|
+
log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
1179
1167
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1180
|
-
|
1168
|
+
log_error "Value object may not contain both @type and @language: #{value.to_json}"
|
1181
1169
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1182
|
-
|
1170
|
+
log_error "Value object with @language must use valid language: #{value.to_json}"
|
1183
1171
|
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
1184
|
-
|
1172
|
+
log_error "Value object with @type must defined type: #{value.to_json}"
|
1185
1173
|
end
|
1186
1174
|
value
|
1187
1175
|
else
|
@@ -1190,16 +1178,16 @@ module RDF::Tabular
|
|
1190
1178
|
case k
|
1191
1179
|
when "@id"
|
1192
1180
|
nv[k] = context.expand_iri(v, documentRelative: true).to_s
|
1193
|
-
|
1181
|
+
log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
|
1194
1182
|
when "@type"
|
1195
1183
|
Array(v).each do |vv|
|
1196
1184
|
# Validate that all type values transform to absolute IRIs
|
1197
1185
|
resource = context.expand_iri(vv, vocab: true)
|
1198
|
-
|
1186
|
+
log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
|
1199
1187
|
end
|
1200
1188
|
nv[k] = v
|
1201
1189
|
when /^(@|_:)/
|
1202
|
-
|
1190
|
+
log_error "Invalid use of #{k} in JSON-LD content"
|
1203
1191
|
else
|
1204
1192
|
nv[k] = normalize_jsonld(k, v)
|
1205
1193
|
end
|
@@ -1212,15 +1200,9 @@ module RDF::Tabular
|
|
1212
1200
|
end
|
1213
1201
|
protected
|
1214
1202
|
|
1215
|
-
# Add a warning on this object
|
1216
|
-
def warn(string)
|
1217
|
-
debug("warn: #{string}")
|
1218
|
-
(@warnings ||= []) << string
|
1219
|
-
end
|
1220
|
-
|
1221
1203
|
def set_property(key, type, value, invalid)
|
1222
1204
|
if invalid
|
1223
|
-
|
1205
|
+
log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1224
1206
|
case type
|
1225
1207
|
when :link, :uri_template
|
1226
1208
|
object[key] = ""
|
@@ -1264,12 +1246,12 @@ module RDF::Tabular
|
|
1264
1246
|
end
|
1265
1247
|
end
|
1266
1248
|
else
|
1267
|
-
|
1249
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1268
1250
|
[]
|
1269
1251
|
end
|
1270
1252
|
|
1271
1253
|
unless object[key].all? {|v| v.is_a?(klass)}
|
1272
|
-
|
1254
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1273
1255
|
# Remove elements that aren't of the right types
|
1274
1256
|
object[key] = object[key].select! {|v| v.is_a?(klass)}
|
1275
1257
|
end
|
@@ -1298,14 +1280,13 @@ module RDF::Tabular
|
|
1298
1280
|
end
|
1299
1281
|
|
1300
1282
|
class DebugContext
|
1301
|
-
include
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
end
|
1283
|
+
include RDF::Util::Logger
|
1284
|
+
end
|
1285
|
+
def self.log_debug(*args, &block)
|
1286
|
+
DebugContext.new.log_debug(*args, &block)
|
1306
1287
|
end
|
1307
|
-
def self.
|
1308
|
-
DebugContext.new(*args
|
1288
|
+
def self.log_warn(*args)
|
1289
|
+
DebugContext.new.log_warn(*args)
|
1309
1290
|
end
|
1310
1291
|
end
|
1311
1292
|
|
@@ -1453,7 +1434,7 @@ module RDF::Tabular
|
|
1453
1434
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1454
1435
|
ctx = @context
|
1455
1436
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1456
|
-
tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
|
1437
|
+
tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
|
1457
1438
|
@parent = tg # Link from parent
|
1458
1439
|
tg
|
1459
1440
|
end
|
@@ -1464,8 +1445,7 @@ module RDF::Tabular
|
|
1464
1445
|
"@id" => (id.to_s if id),
|
1465
1446
|
"@type" => "AnnotatedTable",
|
1466
1447
|
"url" => self.url.to_s,
|
1467
|
-
"
|
1468
|
-
"rows" => []
|
1448
|
+
"tableSchema" => (tableSchema.to_atd if tableSchema),
|
1469
1449
|
}) do |memo, (k, v)|
|
1470
1450
|
memo[k.to_s] ||= v
|
1471
1451
|
memo
|
@@ -1519,12 +1499,12 @@ module RDF::Tabular
|
|
1519
1499
|
end
|
1520
1500
|
end
|
1521
1501
|
else
|
1522
|
-
|
1502
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1523
1503
|
[]
|
1524
1504
|
end
|
1525
1505
|
|
1526
1506
|
unless object[:columns].all? {|v| v.is_a?(Column)}
|
1527
|
-
|
1507
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1528
1508
|
# Remove elements that aren't of the right types
|
1529
1509
|
object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
|
1530
1510
|
end
|
@@ -1534,12 +1514,12 @@ module RDF::Tabular
|
|
1534
1514
|
object[:foreignKeys] = case value
|
1535
1515
|
when Array then value
|
1536
1516
|
else
|
1537
|
-
|
1517
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1538
1518
|
[]
|
1539
1519
|
end
|
1540
1520
|
|
1541
1521
|
unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
|
1542
|
-
|
1522
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1543
1523
|
# Remove elements that aren't of the right types
|
1544
1524
|
object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
|
1545
1525
|
end
|
@@ -1562,6 +1542,18 @@ module RDF::Tabular
|
|
1562
1542
|
end
|
1563
1543
|
end
|
1564
1544
|
end
|
1545
|
+
|
1546
|
+
# Return Annotated Table representation
|
1547
|
+
def to_atd
|
1548
|
+
object.inject({
|
1549
|
+
"@id" => (id.to_s if id),
|
1550
|
+
"@type" => "Schema",
|
1551
|
+
"columns" => Array(columns).map(&:to_atd),
|
1552
|
+
}) do |memo, (k, v)|
|
1553
|
+
memo[k.to_s] ||= v
|
1554
|
+
memo
|
1555
|
+
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
|
1556
|
+
end
|
1565
1557
|
end
|
1566
1558
|
|
1567
1559
|
class Column < Metadata
|
@@ -1650,7 +1642,6 @@ module RDF::Tabular
|
|
1650
1642
|
"table" => (table.id.to_s if table.id),
|
1651
1643
|
"number" => self.number,
|
1652
1644
|
"sourceNumber" => self.sourceNumber,
|
1653
|
-
"cells" => [],
|
1654
1645
|
"virtual" => self.virtual,
|
1655
1646
|
"name" => self.name,
|
1656
1647
|
"titles" => self.titles
|
@@ -1701,7 +1692,7 @@ module RDF::Tabular
|
|
1701
1692
|
class Dialect < Metadata
|
1702
1693
|
# Defaults for dialects
|
1703
1694
|
DEFAULTS = {
|
1704
|
-
commentPrefix:
|
1695
|
+
commentPrefix: false,
|
1705
1696
|
delimiter: ",".freeze,
|
1706
1697
|
doubleQuote: true,
|
1707
1698
|
encoding: "utf-8".freeze,
|
@@ -1814,38 +1805,75 @@ module RDF::Tabular
|
|
1814
1805
|
lang ||= 'und'
|
1815
1806
|
|
1816
1807
|
# Set encoding on input
|
1817
|
-
|
1818
|
-
(
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1824
|
-
|
1825
|
-
|
1826
|
-
|
1827
|
-
|
1828
|
-
|
1829
|
-
|
1830
|
-
row_data = Array(csv.shift)
|
1831
|
-
Array(row_data).each_with_index do |value, index|
|
1832
|
-
# Skip columns
|
1833
|
-
skipCols = skipColumns.to_i
|
1834
|
-
next if index < skipCols
|
1808
|
+
path = input.base_uri.path rescue ""
|
1809
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
1810
|
+
# Input is HTML; use fragment identfier to find table.
|
1811
|
+
fragment = RDF::URI(table["url"]).fragment rescue nil
|
1812
|
+
tab = begin
|
1813
|
+
# Extract with nokogiri
|
1814
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
1815
|
+
doc = Nokogiri::HTML.parse(input)
|
1816
|
+
doc.search("##{fragment}").first if fragment
|
1817
|
+
rescue LoadError
|
1818
|
+
# Extract with REXML
|
1819
|
+
# FIXME
|
1820
|
+
end
|
1835
1821
|
|
1822
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
1823
|
+
|
1824
|
+
# Use rows with <th> to create column titles
|
1825
|
+
tab.xpath('.//tr').each do |row|
|
1826
|
+
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
|
+
# Skip columns
|
1828
|
+
skipCols = skipColumns.to_i
|
1829
|
+
next if index < skipCols
|
1830
|
+
|
1831
|
+
# Trim value
|
1832
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1833
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1834
|
+
|
1835
|
+
# Initialize titles
|
1836
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1837
|
+
column = columns[index - skipCols] ||= {
|
1838
|
+
"titles" => {lang => []},
|
1839
|
+
}
|
1840
|
+
column["titles"][lang] << value
|
1841
|
+
end
|
1842
|
+
end
|
1843
|
+
else
|
1844
|
+
csv = ::CSV.new(input, csv_options)
|
1845
|
+
(1..skipRows.to_i).each do
|
1846
|
+
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1836
1847
|
# Trim value
|
1837
1848
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
1838
1849
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1839
1850
|
|
1840
|
-
|
1841
|
-
|
1842
|
-
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1851
|
+
value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
|
1852
|
+
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1853
|
+
end
|
1854
|
+
log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1855
|
+
|
1856
|
+
(1..headerRowCount).each do
|
1857
|
+
row_data = Array(csv.shift)
|
1858
|
+
Array(row_data).each_with_index do |value, index|
|
1859
|
+
# Skip columns
|
1860
|
+
skipCols = skipColumns.to_i
|
1861
|
+
next if index < skipCols
|
1862
|
+
|
1863
|
+
# Trim value
|
1864
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1865
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1866
|
+
|
1867
|
+
# Initialize titles
|
1868
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1869
|
+
column = columns[index - skipCols] ||= {
|
1870
|
+
"titles" => {lang => []},
|
1871
|
+
}
|
1872
|
+
column["titles"][lang] << value
|
1873
|
+
end
|
1846
1874
|
end
|
1847
1875
|
end
|
1848
|
-
|
1876
|
+
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1849
1877
|
input.rewind if input.respond_to?(:rewind)
|
1850
1878
|
|
1851
1879
|
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
@@ -2030,13 +2058,13 @@ module RDF::Tabular
|
|
2030
2058
|
|
2031
2059
|
# create column if necessary
|
2032
2060
|
columns[index - skipColumns] ||=
|
2033
|
-
Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
|
2061
|
+
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2034
2062
|
|
2035
2063
|
column = columns[index - skipColumns]
|
2036
2064
|
|
2037
2065
|
@values << cell = Cell.new(metadata, column, self, value)
|
2038
2066
|
|
2039
|
-
datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
|
2067
|
+
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2040
2068
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2041
2069
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2042
2070
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2092,7 +2120,11 @@ module RDF::Tabular
|
|
2092
2120
|
|
2093
2121
|
# Identifier for this row, as an RFC7111 fragment
|
2094
2122
|
# @return [RDF::URI]
|
2095
|
-
def id;
|
2123
|
+
def id;
|
2124
|
+
u = table.url.dup
|
2125
|
+
u.fragment = "row=#{self.sourceNumber}"
|
2126
|
+
u
|
2127
|
+
end
|
2096
2128
|
|
2097
2129
|
# Return Annotated Row representation
|
2098
2130
|
def to_atd
|