rdf-tabular 0.3.0 → 0.4.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/VERSION +1 -1
- data/lib/rdf/tabular.rb +0 -1
- data/lib/rdf/tabular/format.rb +16 -0
- data/lib/rdf/tabular/metadata.rb +223 -191
- data/lib/rdf/tabular/reader.rb +84 -133
- data/lib/rdf/tabular/uax35.rb +0 -2
- data/spec/format_spec.rb +34 -0
- data/spec/matchers.rb +3 -78
- data/spec/metadata_spec.rb +150 -80
- data/spec/reader_spec.rb +27 -24
- data/spec/spec_helper.rb +4 -3
- data/spec/suite_helper.rb +1 -1
- data/spec/suite_spec.rb +8 -9
- metadata +109 -60
- data/lib/rdf/tabular/utils.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b34ec5c872bbf6e8d8f13559b255283cd118cd46
|
4
|
+
data.tar.gz: 08ace967385cb72fdc48e48ad434f7e0bd35753d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55a2305ce14c365631a1f7ad178e4b2c603ef2279c74537cb36f33a47ef81a19b250b1cad64719be5c1921536e06ee2ba4bd2fa2745dff81bfe652ed31ed823d
|
7
|
+
data.tar.gz: 9a3b83c57938b94ebf1ab86052a0bccd144c9380394150748a6e580bf480e1bf39f835a9b2ff2633b3d6ca09823b782c945f3a40fbeec653d6c0ae61218805af
|
data/README.md
CHANGED
@@ -13,6 +13,7 @@ RDF::Tabular parses CSV or other Tabular Data into [RDF][] and JSON using the [W
|
|
13
13
|
|
14
14
|
* Parses [number patterns](http://www.unicode.org/reports/tr35/tr35-39/tr35-numbers.html#Number_Patterns) from [UAX35][]
|
15
15
|
* Parses [date formats](http://www.unicode.org/reports/tr35/tr35-39/tr35-dates.html#Contents) from [UAX35][]
|
16
|
+
* Returns detailed errors and warnings using optional `Logger`.
|
16
17
|
|
17
18
|
## Installation
|
18
19
|
Install with `gem install rdf-tabular`
|
@@ -247,8 +248,8 @@ Full documentation available on [RubyDoc](http://rubydoc.info/gems/rdf-tabular/f
|
|
247
248
|
* {RDF::Tabular::Reader}
|
248
249
|
|
249
250
|
## Dependencies
|
250
|
-
* [Ruby](http://ruby-lang.org/) (>= 2.0
|
251
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (>=
|
251
|
+
* [Ruby](http://ruby-lang.org/) (>= 2.0)
|
252
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0)
|
252
253
|
* [JSON](https://rubygems.org/gems/json) (>= 1.5)
|
253
254
|
|
254
255
|
## Installation
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0.beta2
|
data/lib/rdf/tabular.rb
CHANGED
@@ -11,7 +11,6 @@ module RDF
|
|
11
11
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
12
12
|
module Tabular
|
13
13
|
require 'rdf/tabular/format'
|
14
|
-
require 'rdf/tabular/utils'
|
15
14
|
autoload :Column, 'rdf/tabular/metadata'
|
16
15
|
autoload :CSVW, 'rdf/tabular/csvw'
|
17
16
|
autoload :Dialect, 'rdf/tabular/metadata'
|
data/lib/rdf/tabular/format.rb
CHANGED
@@ -46,5 +46,21 @@ module RDF::Tabular
|
|
46
46
|
def self.detect(sample)
|
47
47
|
!!sample.match(/^(?:(?:\w )+,(?:\w ))$/)
|
48
48
|
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# Hash of CLI commands appropriate for this format
|
52
|
+
# @return [Hash{Symbol => Lambda(Array, Hash)}]
|
53
|
+
def self.cli_commands
|
54
|
+
{
|
55
|
+
:"tabular-json" => ->(argv, opts) do
|
56
|
+
raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
|
57
|
+
out = opts[:output] || $stdout
|
58
|
+
out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
|
59
|
+
RDF::CLI.parse(argv, opts) do |reader|
|
60
|
+
out.puts reader.to_json
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
end
|
49
65
|
end
|
50
66
|
end
|
data/lib/rdf/tabular/metadata.rb
CHANGED
@@ -19,16 +19,12 @@ require 'yaml' # used by BCP47, which should have required it.
|
|
19
19
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
20
20
|
module RDF::Tabular
|
21
21
|
class Metadata
|
22
|
-
include
|
22
|
+
include RDF::Util::Logger
|
23
23
|
|
24
24
|
# Hash representation
|
25
25
|
# @return [Hash<Symbol,Object>]
|
26
26
|
attr_accessor :object
|
27
27
|
|
28
|
-
# Warnings detected on initialization or when setting properties
|
29
|
-
# @return [Array<String>]
|
30
|
-
attr_accessor :warnings
|
31
|
-
|
32
28
|
# Inheritect properties, valid for all types
|
33
29
|
INHERITED_PROPERTIES = {
|
34
30
|
aboutUrl: :uri_template,
|
@@ -179,7 +175,6 @@ module RDF::Tabular
|
|
179
175
|
# @return [Metadata]
|
180
176
|
def self.for_input(input, options = {})
|
181
177
|
base = options[:base]
|
182
|
-
warnings = options.fetch(:warnings, [])
|
183
178
|
|
184
179
|
# Use user metadata, if provided
|
185
180
|
metadata = case options[:metadata]
|
@@ -202,10 +197,7 @@ module RDF::Tabular
|
|
202
197
|
if md.describes_file?(base)
|
203
198
|
metadata = md
|
204
199
|
else
|
205
|
-
|
206
|
-
if options[:validate] && !options[:warnings]
|
207
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
208
|
-
end
|
200
|
+
log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
|
209
201
|
end
|
210
202
|
end
|
211
203
|
end
|
@@ -214,12 +206,12 @@ module RDF::Tabular
|
|
214
206
|
# If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
|
215
207
|
if !metadata && base
|
216
208
|
templates = site_wide_config(base)
|
217
|
-
|
209
|
+
log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
|
218
210
|
locs = templates.map do |template|
|
219
211
|
t = Addressable::Template.new(template)
|
220
212
|
RDF::URI(base).join(t.expand(url: base).to_s)
|
221
213
|
end
|
222
|
-
|
214
|
+
log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
|
223
215
|
|
224
216
|
locs.each do |loc|
|
225
217
|
metadata ||= begin
|
@@ -230,15 +222,12 @@ module RDF::Tabular
|
|
230
222
|
if md.describes_file?(base)
|
231
223
|
md
|
232
224
|
else
|
233
|
-
|
234
|
-
if options[:validate] && !options[:warnings]
|
235
|
-
$stderr.puts "Warnings: #{warnings.join("\n")}"
|
236
|
-
end
|
225
|
+
log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
|
237
226
|
nil
|
238
227
|
end
|
239
228
|
end
|
240
229
|
rescue IOError
|
241
|
-
|
230
|
+
log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
|
242
231
|
nil
|
243
232
|
end
|
244
233
|
end
|
@@ -331,7 +320,6 @@ module RDF::Tabular
|
|
331
320
|
# @return [Metadata]
|
332
321
|
def initialize(input, options = {})
|
333
322
|
@options = options.dup
|
334
|
-
@options[:depth] ||= 0
|
335
323
|
|
336
324
|
# Parent of this Metadata, if any
|
337
325
|
@parent = @options[:parent]
|
@@ -344,14 +332,14 @@ module RDF::Tabular
|
|
344
332
|
|
345
333
|
@context = case input['@context']
|
346
334
|
when Array
|
347
|
-
|
335
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'" unless input['@context'].include?('http://www.w3.org/ns/csvw')
|
348
336
|
c = LOCAL_CONTEXT.dup
|
349
337
|
c.base = RDF::URI(opt_base)
|
350
338
|
obj = input['@context'].detect {|e| e.is_a?(Hash)} || {}
|
351
339
|
raise Error, "@context has object with properties other than @base and @language" unless (obj.keys.map(&:to_s) - %w(@base @language)).empty?
|
352
340
|
c.parse(obj)
|
353
341
|
when Hash
|
354
|
-
|
342
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
355
343
|
c = LOCAL_CONTEXT.dup
|
356
344
|
c.base = RDF::URI(opt_base)
|
357
345
|
c.parse(input['@context'])
|
@@ -362,7 +350,7 @@ module RDF::Tabular
|
|
362
350
|
c
|
363
351
|
else
|
364
352
|
if self.is_a?(TableGroup) || self.is_a?(Table) && !@parent
|
365
|
-
|
353
|
+
log_warn "Context missing required value 'http://www.w3.org/ns/csvw'"
|
366
354
|
LOCAL_CONTEXT.dup
|
367
355
|
c = LOCAL_CONTEXT.dup
|
368
356
|
c.base = RDF::URI(opt_base)
|
@@ -375,7 +363,7 @@ module RDF::Tabular
|
|
375
363
|
@options[:base] = @context ? @context.base : RDF::URI(opt_base)
|
376
364
|
|
377
365
|
if @context && @context.default_language && !BCP47::Language.identify(@context.default_language.to_s)
|
378
|
-
|
366
|
+
log_warn "Context has invalid @language (#{@context.default_language.inspect}): expected valid BCP47 language tag"
|
379
367
|
@context.default_language = nil
|
380
368
|
end
|
381
369
|
|
@@ -385,7 +373,7 @@ module RDF::Tabular
|
|
385
373
|
|
386
374
|
@object = {}
|
387
375
|
|
388
|
-
|
376
|
+
log_depth do
|
389
377
|
# Input was parsed in .new
|
390
378
|
# Metadata is object with symbolic keys
|
391
379
|
input.each do |key, value|
|
@@ -401,7 +389,7 @@ module RDF::Tabular
|
|
401
389
|
object[:@id] = if value.is_a?(String)
|
402
390
|
value
|
403
391
|
else
|
404
|
-
|
392
|
+
log_warn "#{type} has invalid property '@id' (#{value.inspect}): expected a string"
|
405
393
|
"" # Default value
|
406
394
|
end
|
407
395
|
@id = @options[:base].join(object[:@id])
|
@@ -426,9 +414,9 @@ module RDF::Tabular
|
|
426
414
|
end
|
427
415
|
|
428
416
|
if reason
|
429
|
-
|
430
|
-
|
431
|
-
|
417
|
+
log_debug("md#initialize") {reason}
|
418
|
+
log_debug("md#initialize") {"filenames: #{filenames}"}
|
419
|
+
log_debug("md#initialize") {"#{inspect}, parent: #{!@parent.nil?}, context: #{!@context.nil?}"} unless is_a?(Dialect)
|
432
420
|
end
|
433
421
|
end
|
434
422
|
|
@@ -487,7 +475,7 @@ module RDF::Tabular
|
|
487
475
|
when Schema
|
488
476
|
value
|
489
477
|
else
|
490
|
-
|
478
|
+
log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
|
491
479
|
Schema.new({}, @options.merge(parent: self, context: nil))
|
492
480
|
end
|
493
481
|
end
|
@@ -534,7 +522,7 @@ module RDF::Tabular
|
|
534
522
|
when Dialect
|
535
523
|
value
|
536
524
|
else
|
537
|
-
|
525
|
+
log_warn "#{type} has invalid property 'dialect' (#{value.inspect}): expected a URL or object"
|
538
526
|
nil
|
539
527
|
end
|
540
528
|
end
|
@@ -544,15 +532,15 @@ module RDF::Tabular
|
|
544
532
|
# @raise [Error] if datatype is not valid
|
545
533
|
def datatype=(value)
|
546
534
|
val = case value
|
547
|
-
when Hash then Datatype.new(value, parent: self)
|
548
|
-
else Datatype.new({base: value}, parent: self)
|
535
|
+
when Hash then Datatype.new(value, @options.merge(parent: self))
|
536
|
+
else Datatype.new({base: value}, @options.merge(parent: self))
|
549
537
|
end
|
550
538
|
|
551
539
|
if val.valid? || value.is_a?(Hash)
|
552
540
|
# Set it if it was specified as an object, which may cause validation errors later
|
553
541
|
object[:datatype] = val
|
554
542
|
else
|
555
|
-
|
543
|
+
log_warn "#{type} has invalid property 'datatype': expected a built-in or an object"
|
556
544
|
end
|
557
545
|
end
|
558
546
|
|
@@ -567,40 +555,20 @@ module RDF::Tabular
|
|
567
555
|
##
|
568
556
|
# Do we have valid metadata?
|
569
557
|
def valid?
|
570
|
-
validate
|
571
|
-
|
572
|
-
rescue
|
573
|
-
false
|
574
|
-
end
|
575
|
-
|
576
|
-
##
|
577
|
-
# Validation errors
|
578
|
-
# @return [Array<String>]
|
579
|
-
def errors
|
580
|
-
validate! && []
|
581
|
-
rescue Error => e
|
582
|
-
e.message.split("\n")
|
558
|
+
validate # Possibly re-validate
|
559
|
+
!log_statistics[:error]
|
583
560
|
end
|
584
561
|
|
585
|
-
|
586
|
-
|
587
|
-
# @return [Array<String>]
|
588
|
-
def warnings
|
589
|
-
((@warnings || []) + object.
|
590
|
-
values.
|
591
|
-
flatten.
|
592
|
-
select {|v| v.is_a?(Metadata)}.
|
593
|
-
map(&:warnings).
|
594
|
-
flatten).compact.uniq
|
562
|
+
def validate!
|
563
|
+
raise Error, "Metadata error" unless valid?
|
595
564
|
end
|
596
565
|
|
597
566
|
##
|
598
567
|
# Validate metadata, raising an error containing all errors detected during validation
|
599
568
|
# @raise [Error] Raise error if metadata has any unexpected properties
|
600
569
|
# @return [self]
|
601
|
-
def validate
|
570
|
+
def validate
|
602
571
|
expected_props, required_props = @properties.keys, @required
|
603
|
-
errors = []
|
604
572
|
|
605
573
|
unless is_a?(Dialect) || is_a?(Transformation)
|
606
574
|
expected_props = expected_props + INHERITED_PROPERTIES.keys
|
@@ -609,10 +577,10 @@ module RDF::Tabular
|
|
609
577
|
# It has only expected properties (exclude metadata)
|
610
578
|
check_keys = object.keys - [:"@id", :"@context"]
|
611
579
|
check_keys = check_keys.reject {|k| k.to_s.include?(':')} unless is_a?(Dialect)
|
612
|
-
|
580
|
+
log_warn "#{type} has unexpected keys: #{(check_keys - expected_props).map(&:to_s)}" unless check_keys.all? {|k| expected_props.include?(k)}
|
613
581
|
|
614
582
|
# It has required properties
|
615
|
-
|
583
|
+
log_error "#{type} missing required keys: #{(required_props - check_keys).map(&:to_s)}" unless (required_props & check_keys) == required_props
|
616
584
|
|
617
585
|
self.normalize!
|
618
586
|
|
@@ -621,55 +589,49 @@ module RDF::Tabular
|
|
621
589
|
value = object[key]
|
622
590
|
case key
|
623
591
|
when :base
|
624
|
-
|
592
|
+
log_error "#{type} has invalid base: #{value.inspect}" unless DATATYPES.keys.map(&:to_s).include?(value)
|
625
593
|
when :columns
|
626
|
-
value.each do |
|
627
|
-
|
628
|
-
|
629
|
-
rescue Error => e
|
630
|
-
errors << e.message
|
631
|
-
end
|
594
|
+
value.each do |col|
|
595
|
+
col.validate
|
596
|
+
log_statistics.merge!(col.log_statistics)
|
632
597
|
end
|
633
598
|
column_names = value.map(&:name)
|
634
|
-
|
599
|
+
log_error "#{type} has invalid property '#{key}': must have unique names: #{column_names.inspect}" unless column_names.uniq == column_names
|
635
600
|
when :datatype, :dialect, :tables, :tableSchema, :transformations
|
636
601
|
Array(value).each do |t|
|
637
602
|
# Make sure value is of appropriate class
|
638
603
|
if t.is_a?({datatype: Datatype, dialect: Dialect, tables: Table, tableSchema: Schema, transformations: Transformation}[key])
|
639
|
-
|
640
|
-
|
641
|
-
rescue Error => e
|
642
|
-
errors << e.message
|
643
|
-
end
|
604
|
+
t.validate
|
605
|
+
log_statistics.merge!(t.log_statistics)
|
644
606
|
else
|
645
|
-
|
607
|
+
log_error "#{type} has invalid property '#{key}': unexpected value #{value.class.name}"
|
646
608
|
end
|
647
609
|
end
|
648
|
-
|
610
|
+
log_error "#{type} has invalid property 'tables': must not be empty" if key == :tables && Array(value).empty?
|
649
611
|
when :foreignKeys
|
650
612
|
# An array of foreign key definitions that define how the values from specified columns within this table link to rows within this table or other tables. A foreign key definition is a JSON object with the properties:
|
651
613
|
value.each do |fk|
|
652
614
|
columnReference, reference = fk['columnReference'], fk['reference']
|
653
|
-
|
654
|
-
|
615
|
+
log_error "#{type} has invalid property '#{key}': missing columnReference and reference" unless columnReference && reference
|
616
|
+
log_error "#{type} has invalid property '#{key}': has extra entries #{fk.keys.inspect}" unless fk.keys.length == 2
|
655
617
|
|
656
618
|
# Verify that columns exist in this schema
|
657
|
-
|
619
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(columnReference).length > 0
|
658
620
|
Array(columnReference).each do |k|
|
659
|
-
|
621
|
+
log_error "#{type} has invalid property '#{key}': columnReference not found #{k}" unless self.columns.any? {|c| c[:name] == k}
|
660
622
|
end
|
661
623
|
|
662
624
|
if reference.is_a?(Hash)
|
663
|
-
|
625
|
+
log_error "#{type} has invalid property '#{key}': reference has extra entries #{reference.keys.inspect}" unless (reference.keys - %w(resource schemaReference columnReference)).empty?
|
664
626
|
ref_cols = reference['columnReference']
|
665
627
|
schema = if reference.has_key?('resource')
|
666
628
|
if reference.has_key?('schemaReference')
|
667
|
-
|
629
|
+
log_error "#{type} has invalid property '#{key}': reference has a schemaReference: #{reference.inspect}"
|
668
630
|
end
|
669
631
|
# resource is the URL of a Table in the TableGroup
|
670
632
|
ref = context.base.join(reference['resource']).to_s
|
671
|
-
table = root.is_a?(TableGroup) && root.tables.detect {|t| t.url == ref}
|
672
|
-
|
633
|
+
table = root.is_a?(TableGroup) && Array(root.tables).detect {|t| t.url == ref}
|
634
|
+
log_error "#{type} has invalid property '#{key}': table referenced by #{ref} not found" unless table
|
673
635
|
table.tableSchema if table
|
674
636
|
elsif reference.has_key?('schemaReference')
|
675
637
|
# resource is the @id of a Schema in the TableGroup
|
@@ -677,25 +639,25 @@ module RDF::Tabular
|
|
677
639
|
tables = root.is_a?(TableGroup) ? root.tables.select {|t| t.tableSchema[:@id] == ref} : []
|
678
640
|
case tables.length
|
679
641
|
when 0
|
680
|
-
|
642
|
+
log_error "#{type} has invalid property '#{key}': schema referenced by #{ref} not found"
|
681
643
|
nil
|
682
644
|
when 1
|
683
645
|
tables.first.tableSchema
|
684
646
|
else
|
685
|
-
|
647
|
+
log_error "#{type} has invalid property '#{key}': multiple schemas found from #{ref}"
|
686
648
|
nil
|
687
649
|
end
|
688
650
|
end
|
689
651
|
|
690
652
|
if schema
|
691
653
|
# ref_cols must exist in schema
|
692
|
-
|
654
|
+
log_error "#{type} has invalid property '#{key}': no columnReference found" unless Array(ref_cols).length > 0
|
693
655
|
Array(ref_cols).each do |k|
|
694
|
-
|
656
|
+
log_error "#{type} has invalid property '#{key}': column reference not found #{k}" unless schema.columns.any? {|c| c[:name] == k}
|
695
657
|
end
|
696
658
|
end
|
697
659
|
else
|
698
|
-
|
660
|
+
log_error "#{type} has invalid property '#{key}': reference must be an object #{reference.inspect}"
|
699
661
|
end
|
700
662
|
end
|
701
663
|
when :format
|
@@ -707,7 +669,7 @@ module RDF::Tabular
|
|
707
669
|
nonNegativeInteger positiveInteger nonPositiveInteger negativeInteger
|
708
670
|
unsignedLong unsignedInt unsignedShort unsignedByte
|
709
671
|
).include?(self.base)
|
710
|
-
|
672
|
+
log_warn "#{type} has invalid property '#{key}': Object form only allowed on string or binary datatypes"
|
711
673
|
object.delete(:format) # act as if not set
|
712
674
|
end
|
713
675
|
|
@@ -715,14 +677,14 @@ module RDF::Tabular
|
|
715
677
|
begin
|
716
678
|
parse_uax35_number(value["pattern"], nil, value.fetch('groupChar', ','), value.fetch('decimalChar', '.'))
|
717
679
|
rescue ArgumentError => e
|
718
|
-
|
719
|
-
object[:format].delete("pattern") # act as if not set
|
680
|
+
log_warn "#{type} has invalid property '#{key}' pattern: #{e.message}"
|
681
|
+
object[:format].delete("pattern") if object[:format] # act as if not set
|
720
682
|
end
|
721
683
|
else
|
722
684
|
case self.base
|
723
685
|
when 'boolean'
|
724
686
|
unless value.split("|").length == 2
|
725
|
-
|
687
|
+
log_warn "#{type} has invalid property '#{key}': annotation provides the true and false values expected, separated by '|'"
|
726
688
|
object.delete(:format) # act as if not set
|
727
689
|
end
|
728
690
|
when :decimal, :integer, :long, :int, :short, :byte,
|
@@ -733,7 +695,7 @@ module RDF::Tabular
|
|
733
695
|
begin
|
734
696
|
parse_uax35_number(value, nil)
|
735
697
|
rescue ArgumentError => e
|
736
|
-
|
698
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
737
699
|
object.delete(:format) # act as if not set
|
738
700
|
end
|
739
701
|
when 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time'
|
@@ -741,7 +703,7 @@ module RDF::Tabular
|
|
741
703
|
begin
|
742
704
|
parse_uax35_date(value, nil)
|
743
705
|
rescue ArgumentError => e
|
744
|
-
|
706
|
+
log_warn "#{type} has invalid property '#{key}': #{e.message}"
|
745
707
|
object.delete(:format) # act as if not set
|
746
708
|
end
|
747
709
|
else
|
@@ -749,7 +711,7 @@ module RDF::Tabular
|
|
749
711
|
begin
|
750
712
|
Regexp.compile(value)
|
751
713
|
rescue
|
752
|
-
|
714
|
+
log_warn "#{type} has invalid property '#{key}': #{$!.message}"
|
753
715
|
object.delete(:format) # act as if not set
|
754
716
|
end
|
755
717
|
end
|
@@ -760,20 +722,20 @@ module RDF::Tabular
|
|
760
722
|
if object[:length]
|
761
723
|
case key
|
762
724
|
when :minLength
|
763
|
-
|
725
|
+
log_error "#{type} has invalid property minLength': both length and minLength requires length be greater than or equal to minLength" if object[:length] < value
|
764
726
|
when :maxLength
|
765
|
-
|
727
|
+
log_error "#{type} has invalid property maxLength': both length and maxLength requires length be less than or equal to maxLength" if object[:length] > value
|
766
728
|
end
|
767
729
|
end
|
768
730
|
|
769
731
|
# Applications must raise an error if minLength and maxLength are both specified and minLength is greater than maxLength.
|
770
732
|
if key == :maxLength && object[:minLength]
|
771
|
-
|
733
|
+
log_error "#{type} has invalid property #{key}': both minLength and maxLength requires minLength be less than or equal to maxLength" if object[:minLength] > value
|
772
734
|
end
|
773
735
|
|
774
736
|
# Applications must raise an error if length, maxLength, or minLength are specified and the base datatype is not string or one of its subtypes, or a binary type.
|
775
737
|
unless %w(string normalizedString token language Name NMTOKEN hexBinary base64Binary binary).include?(self.base)
|
776
|
-
|
738
|
+
log_error "#{type} has invalid property '#{key}': only allowed on string or binary datatypes"
|
777
739
|
end
|
778
740
|
when :minimum, :maximum, :minInclusive, :maxInclusive, :minExclusive, :maxExclusive
|
779
741
|
case self.base
|
@@ -781,46 +743,46 @@ module RDF::Tabular
|
|
781
743
|
'nonNegativeInteger', 'positiveInteger', 'unsignedLong', 'unsignedInt', 'unsignedShort', 'unsignedByte',
|
782
744
|
'nonPositiveInteger', 'negativeInteger', 'date', 'dateTime', 'datetime', 'dateTimeStamp', 'time',
|
783
745
|
'duration', 'dayTimeDuration', 'yearMonthDuration'
|
784
|
-
|
746
|
+
log_error "#{type} has invalid property '#{key}': #{value.to_ntriples} is not a valid #{self.base}" unless value.valid?
|
785
747
|
|
786
748
|
case key
|
787
749
|
when :minInclusive
|
788
750
|
# Applications MUST raise an error if both minInclusive and minExclusive are specified
|
789
|
-
|
751
|
+
log_error "#{type} cannot specify both minInclusive and minExclusive" if self.minExclusive
|
790
752
|
|
791
753
|
# Applications MUST raise an error if both minInclusive and maxInclusive are specified and maxInclusive is less than minInclusive
|
792
|
-
|
754
|
+
log_error "#{type} maxInclusive < minInclusive" if self.maxInclusive && self.maxInclusive < value
|
793
755
|
|
794
756
|
# Applications MUST raise an error if both minInclusive and maxExclusive are specified and maxExclusive is less than or equal to minInclusive
|
795
|
-
|
757
|
+
log_error "#{type} maxExclusive <= minInclusive" if self.maxExclusive && self.maxExclusive <= value
|
796
758
|
when :maxInclusive
|
797
759
|
# Applications MUST raise an error if both maxInclusive and maxExclusive are specified
|
798
|
-
|
760
|
+
log_error "#{type} cannot specify both maInclusive and maxExclusive" if self.maxExclusive
|
799
761
|
when :minExclusive
|
800
762
|
# Applications MUST raise an error if both minExclusive and maxExclusive are specified and maxExclusive is less than minExclusive
|
801
|
-
|
763
|
+
log_error "#{type} minExclusive < maxExclusive" if self.maxExclusive && self.maxExclusive < value
|
802
764
|
|
803
765
|
# Applications MUST raise an error if both minExclusive and maxInclusive are specified and maxInclusive is less than or equal to minExclusive
|
804
|
-
|
766
|
+
log_error "#{type} maxInclusive < minExclusive" if self.maxInclusive && self.maxInclusive <= value
|
805
767
|
end
|
806
768
|
else
|
807
|
-
|
769
|
+
log_error "#{type} has invalid property '#{key}': only allowed on numeric, date/time or duration datatypes"
|
808
770
|
end
|
809
771
|
when :notes
|
810
772
|
unless value.is_a?(Hash) || value.is_a?(Array)
|
811
|
-
|
773
|
+
log_error "#{type} has invalid property '#{key}': #{value}, Object or Array"
|
812
774
|
end
|
813
775
|
begin
|
814
776
|
normalize_jsonld(key, value)
|
815
777
|
rescue Error => e
|
816
|
-
|
778
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
817
779
|
end
|
818
780
|
when :primaryKey, :rowTitles
|
819
781
|
# A column reference property that holds either a single reference to a column description object or an array of references.
|
820
782
|
"#{type} has invalid property '#{key}': no column references found" unless Array(value).length > 0
|
821
783
|
Array(value).each do |k|
|
822
784
|
unless self.columns.any? {|c| c[:name] == k}
|
823
|
-
|
785
|
+
log_warn "#{type} has invalid property '#{key}': column reference not found #{k}"
|
824
786
|
object.delete(key)
|
825
787
|
end
|
826
788
|
end
|
@@ -829,34 +791,33 @@ module RDF::Tabular
|
|
829
791
|
when :@id
|
830
792
|
# Must not be a BNode
|
831
793
|
if value.to_s.start_with?("_:")
|
832
|
-
|
794
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not start with '_:'"
|
833
795
|
end
|
834
796
|
|
835
797
|
# Datatype @id MUST NOT be the URL of a built-in type
|
836
798
|
if self.is_a?(Datatype) && DATATYPES.values.include?(value)
|
837
|
-
|
799
|
+
log_error "#{type} has invalid property '#{key}': #{value.inspect}, must not be the URL of a built-in datatype"
|
838
800
|
end
|
839
801
|
when :@type
|
840
802
|
# Must not be a BNode
|
841
803
|
if value.to_s.start_with?("_:")
|
842
|
-
|
804
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, must not start with '_:'"
|
843
805
|
end
|
844
806
|
case type
|
845
807
|
when :Transformation
|
846
|
-
|
808
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == :Template
|
847
809
|
else
|
848
|
-
|
810
|
+
log_error "#{type} has invalid property '@type': #{value.inspect}, expected #{type}" unless value.to_sym == type
|
849
811
|
end
|
850
812
|
when ->(k) {key.to_s.include?(':')}
|
851
813
|
begin
|
852
814
|
normalize_jsonld(key, value)
|
853
815
|
rescue Error => e
|
854
|
-
|
816
|
+
log_error "#{type} has invalid content '#{key}': #{e.message}"
|
855
817
|
end
|
856
818
|
end
|
857
819
|
end
|
858
820
|
|
859
|
-
raise Error, errors.join("\n") unless errors.empty?
|
860
821
|
self
|
861
822
|
end
|
862
823
|
|
@@ -885,10 +846,37 @@ module RDF::Tabular
|
|
885
846
|
# @param [:read] input
|
886
847
|
# @yield [Row]
|
887
848
|
def each_row(input)
|
888
|
-
csv =
|
889
|
-
|
890
|
-
|
891
|
-
|
849
|
+
csv, number, skipped = nil, 0, 0
|
850
|
+
path = input.base_uri.path rescue ""
|
851
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
852
|
+
# Input is HTML; use fragment identfier to find table.
|
853
|
+
fragment = RDF::URI(self.url).fragment rescue nil
|
854
|
+
tab = begin
|
855
|
+
# Extract with nokogiri
|
856
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
857
|
+
doc = Nokogiri::HTML.parse(input)
|
858
|
+
doc.search("##{fragment}").first if fragment
|
859
|
+
rescue LoadError
|
860
|
+
# Extract with REXML
|
861
|
+
# FIXME
|
862
|
+
end
|
863
|
+
|
864
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
865
|
+
|
866
|
+
# Use rows with <td> to create column data
|
867
|
+
csv = []
|
868
|
+
number = 0
|
869
|
+
tab.xpath('.//tr').map do |row|
|
870
|
+
number += 1 if row.xpath('th')
|
871
|
+
data = row.xpath('td').map(&:content)
|
872
|
+
csv << data unless data.empty?
|
873
|
+
end
|
874
|
+
else
|
875
|
+
csv = ::CSV.new(input, csv_options)
|
876
|
+
# Skip skipRows and headerRowCount
|
877
|
+
skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
|
878
|
+
(1..skipped).each {csv.shift}
|
879
|
+
end
|
892
880
|
csv.each do |data|
|
893
881
|
# Check for embedded comments
|
894
882
|
if dialect.commentPrefix && data.first.to_s.start_with?(dialect.commentPrefix)
|
@@ -934,17 +922,17 @@ module RDF::Tabular
|
|
934
922
|
if value['@value']
|
935
923
|
dt = RDF::URI(context.expand_iri(value['@type'], vocab: true)) if value['@type']
|
936
924
|
lit = RDF::Literal(value['@value'], language: value['@language'], datatype: dt)
|
937
|
-
block.call(RDF::Statement
|
925
|
+
block.call(RDF::Statement(subject, property, lit))
|
938
926
|
else
|
939
927
|
# value MUST be a node object, establish a new subject from `@id`
|
940
928
|
s2 = value.has_key?('@id') ? context.expand_iri(value['@id']) : RDF::Node.new
|
941
929
|
|
942
930
|
# Generate a triple
|
943
|
-
block.call(RDF::Statement
|
931
|
+
block.call(RDF::Statement(subject, property, s2))
|
944
932
|
|
945
933
|
# Generate types
|
946
934
|
Array(value['@type']).each do |t|
|
947
|
-
block.call(RDF::Statement
|
935
|
+
block.call(RDF::Statement(s2, RDF.type, context.expand_iri(t, vocab: true)))
|
948
936
|
end
|
949
937
|
|
950
938
|
# Generate triples for all other properties
|
@@ -956,7 +944,7 @@ module RDF::Tabular
|
|
956
944
|
else
|
957
945
|
# Value is a primitive JSON value
|
958
946
|
lit = RDF::Literal(value)
|
959
|
-
block.call(RDF::Statement
|
947
|
+
block.call(RDF::Statement(subject, property, RDF::Literal(value)))
|
960
948
|
end
|
961
949
|
else
|
962
950
|
case value
|
@@ -1011,7 +999,7 @@ module RDF::Tabular
|
|
1011
999
|
if @options[:validate]
|
1012
1000
|
raise Error, "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1013
1001
|
else
|
1014
|
-
|
1002
|
+
log_warn "TableGroups must have Table with matching url #{tables.map(&:url).inspect} vs #{other.url.inspect}"
|
1015
1003
|
end
|
1016
1004
|
end
|
1017
1005
|
else
|
@@ -1020,7 +1008,7 @@ module RDF::Tabular
|
|
1020
1008
|
if @options[:validate]
|
1021
1009
|
raise Error, "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1022
1010
|
else
|
1023
|
-
|
1011
|
+
log_warn "Tables must have the same url: #{url.inspect} vs #{other.url.inspect}}"
|
1024
1012
|
end
|
1025
1013
|
end
|
1026
1014
|
|
@@ -1033,7 +1021,7 @@ module RDF::Tabular
|
|
1033
1021
|
if @options[:validate]
|
1034
1022
|
raise Error, "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1035
1023
|
else
|
1036
|
-
|
1024
|
+
log_warn "Columns must have the same number of non-virtual columns: #{non_virtual_columns.map(&:name).inspect} vs #{object_columns.map(&:name).inspect}"
|
1037
1025
|
|
1038
1026
|
# If present, a virtual column MUST appear after all other non-virtual column definitions
|
1039
1027
|
raise Error, "Virtual columns may not appear before non-virtual columns" unless Array(tableSchema.columns)[0..non_virtual_columns.length-1] == non_virtual_columns
|
@@ -1048,13 +1036,13 @@ module RDF::Tabular
|
|
1048
1036
|
end
|
1049
1037
|
index = 0
|
1050
1038
|
object_columns.all? do |cb|
|
1051
|
-
ca = non_virtual_columns[index] || Column.new({})
|
1039
|
+
ca = non_virtual_columns[index] || Column.new({}, @options)
|
1052
1040
|
ta = ca.titles || {}
|
1053
1041
|
tb = cb.titles || {}
|
1054
1042
|
if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
|
1055
1043
|
true
|
1056
1044
|
elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
|
1057
|
-
raise Error, "Columns don't match
|
1045
|
+
raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
|
1058
1046
|
elsif @options[:validate] || !ta.empty? && !tb.empty?
|
1059
1047
|
# If validating, column compatibility requires strict match between titles
|
1060
1048
|
titles_match = case
|
@@ -1078,10 +1066,10 @@ module RDF::Tabular
|
|
1078
1066
|
true
|
1079
1067
|
elsif !@options[:validate]
|
1080
1068
|
# If not validating, columns don't match, but processing continues
|
1081
|
-
|
1069
|
+
log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1082
1070
|
true
|
1083
1071
|
else
|
1084
|
-
raise Error, "Columns don't match
|
1072
|
+
raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
|
1085
1073
|
end
|
1086
1074
|
end
|
1087
1075
|
index += 1
|
@@ -1175,13 +1163,13 @@ module RDF::Tabular
|
|
1175
1163
|
when Hash
|
1176
1164
|
if value['@value']
|
1177
1165
|
if !(value.keys.sort - %w(@value @type @language)).empty?
|
1178
|
-
|
1166
|
+
log_error "Value object may not contain keys other than @value, @type, or @language: #{value.to_json}"
|
1179
1167
|
elsif (value.keys.sort & %w(@language @type)) == %w(@language @type)
|
1180
|
-
|
1168
|
+
log_error "Value object may not contain both @type and @language: #{value.to_json}"
|
1181
1169
|
elsif value['@language'] && !BCP47::Language.identify(value['@language'].to_s)
|
1182
|
-
|
1170
|
+
log_error "Value object with @language must use valid language: #{value.to_json}"
|
1183
1171
|
elsif value['@type'] && (value['@type'].start_with?('_:') || !context.expand_iri(value['@type'], vocab: true).absolute?)
|
1184
|
-
|
1172
|
+
log_error "Value object with @type must defined type: #{value.to_json}"
|
1185
1173
|
end
|
1186
1174
|
value
|
1187
1175
|
else
|
@@ -1190,16 +1178,16 @@ module RDF::Tabular
|
|
1190
1178
|
case k
|
1191
1179
|
when "@id"
|
1192
1180
|
nv[k] = context.expand_iri(v, documentRelative: true).to_s
|
1193
|
-
|
1181
|
+
log_error "Invalid use of explicit BNode on @id" if nv[k].start_with?('_:')
|
1194
1182
|
when "@type"
|
1195
1183
|
Array(v).each do |vv|
|
1196
1184
|
# Validate that all type values transform to absolute IRIs
|
1197
1185
|
resource = context.expand_iri(vv, vocab: true)
|
1198
|
-
|
1186
|
+
log_error "Invalid type #{vv} in JSON-LD context" unless resource.is_a?(RDF::URI) && resource.absolute?
|
1199
1187
|
end
|
1200
1188
|
nv[k] = v
|
1201
1189
|
when /^(@|_:)/
|
1202
|
-
|
1190
|
+
log_error "Invalid use of #{k} in JSON-LD content"
|
1203
1191
|
else
|
1204
1192
|
nv[k] = normalize_jsonld(k, v)
|
1205
1193
|
end
|
@@ -1212,15 +1200,9 @@ module RDF::Tabular
|
|
1212
1200
|
end
|
1213
1201
|
protected
|
1214
1202
|
|
1215
|
-
# Add a warning on this object
|
1216
|
-
def warn(string)
|
1217
|
-
debug("warn: #{string}")
|
1218
|
-
(@warnings ||= []) << string
|
1219
|
-
end
|
1220
|
-
|
1221
1203
|
def set_property(key, type, value, invalid)
|
1222
1204
|
if invalid
|
1223
|
-
|
1205
|
+
log_warn "#{type} has invalid property '#{key}' (#{value.inspect}): expected #{invalid}"
|
1224
1206
|
case type
|
1225
1207
|
when :link, :uri_template
|
1226
1208
|
object[key] = ""
|
@@ -1264,12 +1246,12 @@ module RDF::Tabular
|
|
1264
1246
|
end
|
1265
1247
|
end
|
1266
1248
|
else
|
1267
|
-
|
1249
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1268
1250
|
[]
|
1269
1251
|
end
|
1270
1252
|
|
1271
1253
|
unless object[key].all? {|v| v.is_a?(klass)}
|
1272
|
-
|
1254
|
+
log_warn "#{type} has invalid property '#{key}': expected array of #{klass}"
|
1273
1255
|
# Remove elements that aren't of the right types
|
1274
1256
|
object[key] = object[key].select! {|v| v.is_a?(klass)}
|
1275
1257
|
end
|
@@ -1298,14 +1280,13 @@ module RDF::Tabular
|
|
1298
1280
|
end
|
1299
1281
|
|
1300
1282
|
class DebugContext
|
1301
|
-
include
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
end
|
1283
|
+
include RDF::Util::Logger
|
1284
|
+
end
|
1285
|
+
def self.log_debug(*args, &block)
|
1286
|
+
DebugContext.new.log_debug(*args, &block)
|
1306
1287
|
end
|
1307
|
-
def self.
|
1308
|
-
DebugContext.new(*args
|
1288
|
+
def self.log_warn(*args)
|
1289
|
+
DebugContext.new.log_warn(*args)
|
1309
1290
|
end
|
1310
1291
|
end
|
1311
1292
|
|
@@ -1453,7 +1434,7 @@ module RDF::Tabular
|
|
1453
1434
|
content['@context'] = object.delete(:@context) if object[:@context]
|
1454
1435
|
ctx = @context
|
1455
1436
|
remove_instance_variable(:@context) if instance_variables.include?(:@context)
|
1456
|
-
tg = TableGroup.new(content, context: ctx, filenames: @filenames, base: base)
|
1437
|
+
tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
|
1457
1438
|
@parent = tg # Link from parent
|
1458
1439
|
tg
|
1459
1440
|
end
|
@@ -1464,8 +1445,7 @@ module RDF::Tabular
|
|
1464
1445
|
"@id" => (id.to_s if id),
|
1465
1446
|
"@type" => "AnnotatedTable",
|
1466
1447
|
"url" => self.url.to_s,
|
1467
|
-
"
|
1468
|
-
"rows" => []
|
1448
|
+
"tableSchema" => (tableSchema.to_atd if tableSchema),
|
1469
1449
|
}) do |memo, (k, v)|
|
1470
1450
|
memo[k.to_s] ||= v
|
1471
1451
|
memo
|
@@ -1519,12 +1499,12 @@ module RDF::Tabular
|
|
1519
1499
|
end
|
1520
1500
|
end
|
1521
1501
|
else
|
1522
|
-
|
1502
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1523
1503
|
[]
|
1524
1504
|
end
|
1525
1505
|
|
1526
1506
|
unless object[:columns].all? {|v| v.is_a?(Column)}
|
1527
|
-
|
1507
|
+
log_warn "#{type} has invalid property 'columns': expected array of Column"
|
1528
1508
|
# Remove elements that aren't of the right types
|
1529
1509
|
object[:columns] = object[:columns].select! {|v| v.is_a?(Column)}
|
1530
1510
|
end
|
@@ -1534,12 +1514,12 @@ module RDF::Tabular
|
|
1534
1514
|
object[:foreignKeys] = case value
|
1535
1515
|
when Array then value
|
1536
1516
|
else
|
1537
|
-
|
1517
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1538
1518
|
[]
|
1539
1519
|
end
|
1540
1520
|
|
1541
1521
|
unless object[:foreignKeys].all? {|v| v.is_a?(Hash)}
|
1542
|
-
|
1522
|
+
log_warn "#{type} has invalid property 'foreignKeys': expected array of ForeignKey"
|
1543
1523
|
# Remove elements that aren't of the right types
|
1544
1524
|
object[:foreignKeys] = object[:foreignKeys].select! {|v| v.is_a?(Hash)}
|
1545
1525
|
end
|
@@ -1562,6 +1542,18 @@ module RDF::Tabular
|
|
1562
1542
|
end
|
1563
1543
|
end
|
1564
1544
|
end
|
1545
|
+
|
1546
|
+
# Return Annotated Table representation
|
1547
|
+
def to_atd
|
1548
|
+
object.inject({
|
1549
|
+
"@id" => (id.to_s if id),
|
1550
|
+
"@type" => "Schema",
|
1551
|
+
"columns" => Array(columns).map(&:to_atd),
|
1552
|
+
}) do |memo, (k, v)|
|
1553
|
+
memo[k.to_s] ||= v
|
1554
|
+
memo
|
1555
|
+
end.delete_if {|k,v| v.nil? || v.is_a?(Metadata)}
|
1556
|
+
end
|
1565
1557
|
end
|
1566
1558
|
|
1567
1559
|
class Column < Metadata
|
@@ -1650,7 +1642,6 @@ module RDF::Tabular
|
|
1650
1642
|
"table" => (table.id.to_s if table.id),
|
1651
1643
|
"number" => self.number,
|
1652
1644
|
"sourceNumber" => self.sourceNumber,
|
1653
|
-
"cells" => [],
|
1654
1645
|
"virtual" => self.virtual,
|
1655
1646
|
"name" => self.name,
|
1656
1647
|
"titles" => self.titles
|
@@ -1701,7 +1692,7 @@ module RDF::Tabular
|
|
1701
1692
|
class Dialect < Metadata
|
1702
1693
|
# Defaults for dialects
|
1703
1694
|
DEFAULTS = {
|
1704
|
-
commentPrefix:
|
1695
|
+
commentPrefix: false,
|
1705
1696
|
delimiter: ",".freeze,
|
1706
1697
|
doubleQuote: true,
|
1707
1698
|
encoding: "utf-8".freeze,
|
@@ -1814,38 +1805,75 @@ module RDF::Tabular
|
|
1814
1805
|
lang ||= 'und'
|
1815
1806
|
|
1816
1807
|
# Set encoding on input
|
1817
|
-
|
1818
|
-
(
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1824
|
-
|
1825
|
-
|
1826
|
-
|
1827
|
-
|
1828
|
-
|
1829
|
-
|
1830
|
-
row_data = Array(csv.shift)
|
1831
|
-
Array(row_data).each_with_index do |value, index|
|
1832
|
-
# Skip columns
|
1833
|
-
skipCols = skipColumns.to_i
|
1834
|
-
next if index < skipCols
|
1808
|
+
path = input.base_uri.path rescue ""
|
1809
|
+
if path.end_with?('.html') || input.respond_to?(:content_type) && input.content_type == 'text/html'
|
1810
|
+
# Input is HTML; use fragment identfier to find table.
|
1811
|
+
fragment = RDF::URI(table["url"]).fragment rescue nil
|
1812
|
+
tab = begin
|
1813
|
+
# Extract with nokogiri
|
1814
|
+
require 'nokogiri' unless defined?(:Nokogiri)
|
1815
|
+
doc = Nokogiri::HTML.parse(input)
|
1816
|
+
doc.search("##{fragment}").first if fragment
|
1817
|
+
rescue LoadError
|
1818
|
+
# Extract with REXML
|
1819
|
+
# FIXME
|
1820
|
+
end
|
1835
1821
|
|
1822
|
+
raise Error, "Expected to find HTML table identified by fragment identifer ##{fragment}" unless tab
|
1823
|
+
|
1824
|
+
# Use rows with <th> to create column titles
|
1825
|
+
tab.xpath('.//tr').each do |row|
|
1826
|
+
row.xpath('th').map(&:content).each_with_index do |value, index|
|
1827
|
+
# Skip columns
|
1828
|
+
skipCols = skipColumns.to_i
|
1829
|
+
next if index < skipCols
|
1830
|
+
|
1831
|
+
# Trim value
|
1832
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1833
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1834
|
+
|
1835
|
+
# Initialize titles
|
1836
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1837
|
+
column = columns[index - skipCols] ||= {
|
1838
|
+
"titles" => {lang => []},
|
1839
|
+
}
|
1840
|
+
column["titles"][lang] << value
|
1841
|
+
end
|
1842
|
+
end
|
1843
|
+
else
|
1844
|
+
csv = ::CSV.new(input, csv_options)
|
1845
|
+
(1..skipRows.to_i).each do
|
1846
|
+
value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
|
1836
1847
|
# Trim value
|
1837
1848
|
value.lstrip! if %w(true start).include?(trim.to_s)
|
1838
1849
|
value.rstrip! if %w(true end).include?(trim.to_s)
|
1839
1850
|
|
1840
|
-
|
1841
|
-
|
1842
|
-
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1851
|
+
value = value[1..-1].strip if commentPrefix && value.start_with?(commentPrefix)
|
1852
|
+
(metadata["rdfs:comment"] ||= []) << value unless value.empty?
|
1853
|
+
end
|
1854
|
+
log_debug("embedded_metadata") {"notes: #{table["notes"].inspect}"}
|
1855
|
+
|
1856
|
+
(1..headerRowCount).each do
|
1857
|
+
row_data = Array(csv.shift)
|
1858
|
+
Array(row_data).each_with_index do |value, index|
|
1859
|
+
# Skip columns
|
1860
|
+
skipCols = skipColumns.to_i
|
1861
|
+
next if index < skipCols
|
1862
|
+
|
1863
|
+
# Trim value
|
1864
|
+
value.lstrip! if %w(true start).include?(trim.to_s)
|
1865
|
+
value.rstrip! if %w(true end).include?(trim.to_s)
|
1866
|
+
|
1867
|
+
# Initialize titles
|
1868
|
+
columns = table["tableSchema"]["columns"] ||= []
|
1869
|
+
column = columns[index - skipCols] ||= {
|
1870
|
+
"titles" => {lang => []},
|
1871
|
+
}
|
1872
|
+
column["titles"][lang] << value
|
1873
|
+
end
|
1846
1874
|
end
|
1847
1875
|
end
|
1848
|
-
|
1876
|
+
log_debug("embedded_metadata") {"table: #{table.inspect}"}
|
1849
1877
|
input.rewind if input.respond_to?(:rewind)
|
1850
1878
|
|
1851
1879
|
Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
|
@@ -2030,13 +2058,13 @@ module RDF::Tabular
|
|
2030
2058
|
|
2031
2059
|
# create column if necessary
|
2032
2060
|
columns[index - skipColumns] ||=
|
2033
|
-
Column.new({}, table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns)
|
2061
|
+
Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
|
2034
2062
|
|
2035
2063
|
column = columns[index - skipColumns]
|
2036
2064
|
|
2037
2065
|
@values << cell = Cell.new(metadata, column, self, value)
|
2038
2066
|
|
2039
|
-
datatype = column.datatype || Datatype.new({base: "string"}, parent: column)
|
2067
|
+
datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
|
2040
2068
|
value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
|
2041
2069
|
value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
|
2042
2070
|
# if the resulting string is an empty string, apply the remaining steps to the string given by the default property
|
@@ -2092,7 +2120,11 @@ module RDF::Tabular
|
|
2092
2120
|
|
2093
2121
|
# Identifier for this row, as an RFC7111 fragment
|
2094
2122
|
# @return [RDF::URI]
|
2095
|
-
def id;
|
2123
|
+
def id;
|
2124
|
+
u = table.url.dup
|
2125
|
+
u.fragment = "row=#{self.sourceNumber}"
|
2126
|
+
u
|
2127
|
+
end
|
2096
2128
|
|
2097
2129
|
# Return Annotated Row representation
|
2098
2130
|
def to_atd
|