rdf-tabular 1.0.0 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +62 -44
  3. data/UNLICENSE +1 -1
  4. data/VERSION +1 -1
  5. data/etc/csvw.jsonld +135 -50
  6. data/etc/doap.csv +1 -1
  7. data/etc/doap.csv-metadata.json +1 -1
  8. data/etc/doap.ttl +14 -13
  9. data/etc/earl.html +648 -648
  10. data/etc/earl.jsonld +691 -691
  11. data/etc/earl.ttl +846 -846
  12. data/lib/rdf/tabular.rb +3 -3
  13. data/lib/rdf/tabular/csvw.rb +626 -182
  14. data/lib/rdf/tabular/format.rb +8 -6
  15. data/lib/rdf/tabular/literal.rb +1 -1
  16. data/lib/rdf/tabular/metadata.rb +63 -58
  17. data/lib/rdf/tabular/reader.rb +32 -20
  18. data/lib/rdf/tabular/uax35.rb +1 -1
  19. data/spec/data/countries-minimal.json +38 -0
  20. data/spec/data/countries-minimal.ttl +36 -0
  21. data/spec/data/countries-standard.json +86 -0
  22. data/spec/data/countries-standard.ttl +75 -0
  23. data/spec/data/countries.csv +4 -0
  24. data/spec/data/countries.csv-minimal.json +16 -0
  25. data/spec/data/countries.csv-minimal.ttl +19 -0
  26. data/spec/data/countries.csv-standard.json +33 -0
  27. data/spec/data/countries.csv-standard.ttl +44 -0
  28. data/spec/data/countries.html +88 -0
  29. data/spec/data/countries.json +53 -0
  30. data/spec/data/countries_embed-minimal.json +38 -0
  31. data/spec/data/countries_embed-minimal.ttl +36 -0
  32. data/spec/data/countries_embed-standard.json +86 -0
  33. data/spec/data/countries_embed-standard.ttl +75 -0
  34. data/spec/data/countries_embed.html +88 -0
  35. data/spec/data/countries_html-minimal.json +38 -0
  36. data/spec/data/countries_html-minimal.ttl +36 -0
  37. data/spec/data/countries_html-standard.json +86 -0
  38. data/spec/data/countries_html-standard.ttl +75 -0
  39. data/spec/data/country-codes-and-names-minimal.json +19 -0
  40. data/spec/data/country-codes-and-names-minimal.ttl +22 -0
  41. data/spec/data/country-codes-and-names-standard.json +47 -0
  42. data/spec/data/country-codes-and-names-standard.ttl +45 -0
  43. data/spec/data/country-codes-and-names.csv +5 -0
  44. data/spec/data/country_slice.csv +4 -0
  45. data/spec/data/junior-roles.csv +3 -0
  46. data/spec/data/junior-roles.json +54 -0
  47. data/spec/data/roles-minimal.json +32 -0
  48. data/spec/data/roles-minimal.ttl +36 -0
  49. data/spec/data/roles-standard.json +56 -0
  50. data/spec/data/roles-standard.ttl +66 -0
  51. data/spec/data/roles.json +23 -0
  52. data/spec/data/senior-roles.csv +3 -0
  53. data/spec/data/senior-roles.json +52 -0
  54. data/spec/data/test232-metadata.json +10 -0
  55. data/spec/data/test232.csv +3 -0
  56. data/spec/data/tree-ops-atd.json +1 -0
  57. data/spec/data/tree-ops-ext-minimal.json +42 -0
  58. data/spec/data/tree-ops-ext-minimal.ttl +34 -0
  59. data/spec/data/tree-ops-ext-standard.json +93 -0
  60. data/spec/data/tree-ops-ext-standard.ttl +82 -0
  61. data/spec/data/tree-ops-ext.csv +4 -0
  62. data/spec/data/tree-ops-ext.json +81 -0
  63. data/spec/data/tree-ops-minimal.json +18 -0
  64. data/spec/data/tree-ops-minimal.ttl +14 -0
  65. data/spec/data/tree-ops-standard.json +44 -0
  66. data/spec/data/tree-ops-standard.ttl +44 -0
  67. data/spec/data/tree-ops-virtual-minimal.json +32 -0
  68. data/spec/data/tree-ops-virtual-minimal.ttl +25 -0
  69. data/spec/data/tree-ops-virtual-standard.json +49 -0
  70. data/spec/data/tree-ops-virtual-standard.ttl +49 -0
  71. data/spec/data/tree-ops-virtual.json +48 -0
  72. data/spec/data/tree-ops.csv +3 -0
  73. data/spec/data/tree-ops.csv-metadata.json +43 -0
  74. data/spec/data/tree-ops.html +54 -0
  75. data/spec/data/tree-ops.tsv +3 -0
  76. data/spec/format_spec.rb +1 -1
  77. data/spec/metadata_spec.rb +85 -8
  78. data/spec/reader_spec.rb +2 -2
  79. data/spec/suite_helper.rb +2 -2
  80. data/spec/suite_spec.rb +4 -5
  81. metadata +147 -37
  82. data/lib/rdf/tabular/json.rb +0 -0
  83. data/spec/w3c-csvw +0 -1
@@ -22,7 +22,7 @@ module RDF::Tabular
22
22
  # @example Obtaining serialization format file extension mappings
23
23
  # RDF::Format.file_extensions #=> {:csv => "text/csv"}
24
24
  #
25
- # @see http://www.w3.org/TR/rdf-testcases/#ntriples
25
+ # @see https://www.w3.org/TR/rdf-testcases/#ntriples
26
26
  class Format < RDF::Format
27
27
  content_type 'text/csv;q=0.4',
28
28
  extensions: [:csv, :tsv],
@@ -52,15 +52,17 @@ module RDF::Tabular
52
52
  # @return [Hash{Symbol => Lambda(Array, Hash)}]
53
53
  def self.cli_commands
54
54
  {
55
- :"tabular-json" => {
56
- description: "Generate tabular json output, rather than RDF for Tabular data",
57
- help: "tabulary-json --input-format tabular files ...",
58
- prase: false,
55
+ "tabular-json": {
56
+ description: "Serialize using tabular JSON",
57
+ parse: false,
58
+ filter: {format: :tabular}, # Only shows output format set
59
+ option_use: {output_format: :disabled},
60
+ help: "tabular-json --input-format tabular files ...\nGenerate tabular JSON output, rather than RDF for Tabular data",
59
61
  lambda: ->(argv, opts) do
60
62
  raise ArgumentError, "Outputting Tabular JSON only allowed when input format is tabular." unless opts[:format] == :tabular
61
63
  out = opts[:output] || $stdout
62
64
  out.set_encoding(Encoding::UTF_8) if RUBY_PLATFORM == "java"
63
- RDF::CLI.parse(argv, opts) do |reader|
65
+ RDF::CLI.parse(argv, **opts) do |reader|
64
66
  out.puts reader.to_json
65
67
  end
66
68
  end
@@ -13,7 +13,7 @@ module RDF::Tabular
13
13
  ##
14
14
  # @param [Object] value
15
15
  # @option options [String] :lexical (nil)
16
- def initialize(value, options = {})
16
+ def initialize(value, **options)
17
17
  @datatype = options[:datatype] || DATATYPE
18
18
  @string = options[:lexical] if options.has_key?(:lexical)
19
19
  if value.is_a?(String)
@@ -16,7 +16,7 @@ require 'yaml' # used by BCP47, which should have required it.
16
16
  # * Return Column-level annotations
17
17
  # * Return row iterator with column information
18
18
  #
19
- # @author [Gregg Kellogg](http://greggkellogg.net/)
19
+ # @author [Gregg Kellogg](https://greggkellogg.net/)
20
20
  module RDF::Tabular
21
21
  class Metadata
22
22
  include RDF::Util::Logger
@@ -136,15 +136,15 @@ module RDF::Tabular
136
136
  # see `RDF::Util::File.open_file` in RDF.rb and {new}
137
137
  # @yield [Metadata]
138
138
  # @raise [IOError] if file not found
139
- def self.open(path, options = {})
139
+ def self.open(path, **options)
140
140
  options = options.merge(
141
141
  headers: {
142
142
  'Accept' => 'application/ld+json, application/json'
143
143
  }
144
144
  )
145
145
  path = "file:" + path unless path =~ /^\w+:/
146
- RDF::Util::File.open_file(path, options) do |file|
147
- self.new(file, options.merge(base: path, filenames: path))
146
+ RDF::Util::File.open_file(path, **options) do |file|
147
+ self.new(file, **options.merge(base: path, filenames: path))
148
148
  end
149
149
  end
150
150
 
@@ -173,16 +173,16 @@ module RDF::Tabular
173
173
  # @option options [RDF::URI] :base
174
174
  # The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
175
175
  # @return [Metadata]
176
- def self.for_input(input, options = {})
176
+ def self.for_input(input, **options)
177
177
  base = options[:base]
178
178
 
179
179
  # Use user metadata, if provided
180
180
  metadata = case options[:metadata]
181
181
  when Metadata then options[:metadata]
182
182
  when Hash
183
- Metadata.new(options[:metadata], options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
183
+ Metadata.new(options[:metadata], **options.merge(reason: "load user metadata: #{options[:metadata].inspect}"))
184
184
  when String, RDF::URI
185
- Metadata.open(options[:metadata], options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
185
+ Metadata.open(options[:metadata], **options.merge(filenames: options[:metadata], reason: "load user metadata: #{options[:metadata].inspect}"))
186
186
  end
187
187
 
188
188
  # Search for metadata until found
@@ -191,13 +191,13 @@ module RDF::Tabular
191
191
  if !metadata && input.respond_to?(:links) &&
192
192
  link = input.links.find_link(%w(rel describedby))
193
193
  link_loc = RDF::URI(base).join(link.href).to_s
194
- md = Metadata.open(link_loc, options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
194
+ md = Metadata.open(link_loc, **options.merge(filenames: link_loc, reason: "load linked metadata: #{link_loc}"))
195
195
  if md
196
196
  # Metadata must describe file to be useful
197
197
  if md.describes_file?(base)
198
198
  metadata = md
199
199
  else
200
- log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", options)
200
+ log_warn("Found metadata at #{link_loc}, which does not describe #{base}, ignoring", **options)
201
201
  end
202
202
  end
203
203
  end
@@ -206,28 +206,30 @@ module RDF::Tabular
206
206
  # If we still don't have metadata, load the site-wide configuration file and use templates found there as locations
207
207
  if !metadata && base
208
208
  templates = site_wide_config(base)
209
- log_debug("for_input", options) {"templates: #{templates.map(&:to_s).inspect}"}
209
+ log_debug("for_input", **options) {"templates: #{templates.map(&:to_s).inspect}"}
210
210
  locs = templates.map do |template|
211
211
  t = Addressable::Template.new(template)
212
- RDF::URI(base).join(t.expand(url: base).to_s)
212
+ mapped = t.expand(url: base).to_s
213
+ mapped = RDF::URI.decode(mapped) if options[:decode_uri]
214
+ RDF::URI(base).join(mapped)
213
215
  end
214
- log_debug("for_input", options) {"locs: #{locs.map(&:to_s).inspect}"}
216
+ log_debug("for_input", **options) {"locs: #{locs.map(&:to_s).inspect}"}
215
217
 
216
218
  locs.each do |loc|
217
219
  metadata ||= begin
218
- md = Metadata.open(loc, options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
220
+ md = Metadata.open(loc, **options.merge(filenames: loc, reason: "load found metadata: #{loc}"))
219
221
  # Metadata must describe file to be useful
220
222
  if md
221
223
  # Metadata must describe file to be useful
222
224
  if md.describes_file?(base)
223
225
  md
224
226
  else
225
- log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", options)
227
+ log_warn("Found metadata at #{loc}, which does not describe #{base}, ignoring", **options)
226
228
  nil
227
229
  end
228
230
  end
229
231
  rescue IOError
230
- log_debug("for_input", options) {"failed to load found metadata #{loc}: #{$!}"}
232
+ log_debug("for_input", **options) {"failed to load found metadata #{loc}: #{$!}"}
231
233
  nil
232
234
  end
233
235
  end
@@ -236,8 +238,8 @@ module RDF::Tabular
236
238
  # Return either the merge or user- and found-metadata, any of these, or an empty TableGroup
237
239
  metadata = case
238
240
  when metadata then metadata
239
- when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, options)
240
- else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, options)
241
+ when base then TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: base}]}, **options)
242
+ else TableGroup.new({"@context" => "http://www.w3.org/ns/csvw", tables: [{url: nil}]}, **options)
241
243
  end
242
244
 
243
245
  # Make TableGroup, if not already
@@ -246,7 +248,7 @@ module RDF::Tabular
246
248
 
247
249
  ##
248
250
  # @private
249
- def self.new(input, options = {})
251
+ def self.new(input, **options)
250
252
  # Triveal case
251
253
  return input if input.is_a?(Metadata)
252
254
 
@@ -297,7 +299,7 @@ module RDF::Tabular
297
299
  end
298
300
 
299
301
  md = klass.allocate
300
- md.send(:initialize, object, options)
302
+ md.send(:initialize, object, **options)
301
303
  md
302
304
  rescue ::JSON::ParserError
303
305
  raise Error, "Expected input to be a JSON Object"
@@ -314,11 +316,13 @@ module RDF::Tabular
314
316
  # Context used for this metadata. Taken from input if not provided
315
317
  # @option options [RDF::URI] :base
316
318
  # The Base URL to use when expanding the document. This overrides the value of `input` if it is a URL. If not specified and `input` is not an URL, the base URL defaults to the current document URL if in a browser context, or the empty string if there is no document context.
319
+ # @option options [Boolean] :decode_uri
320
+ # Decode %-encodings in the result of a URI Template operation.
317
321
  # @option options [Boolean] :normalize normalize the object
318
322
  # @option options [Boolean] :validate Strict metadata validation
319
323
  # @raise [Error]
320
324
  # @return [Metadata]
321
- def initialize(input, options = {})
325
+ def initialize(input, **options)
322
326
  @options = options.dup
323
327
 
324
328
  # Parent of this Metadata, if any
@@ -467,16 +471,16 @@ module RDF::Tabular
467
471
  object[:tableSchema] = case value
468
472
  when String
469
473
  link = context.base.join(value).to_s
470
- md = Schema.open(link, @options.merge(parent: self, context: nil, normalize: true))
474
+ md = Schema.open(link, **@options.merge(parent: self, context: nil, normalize: true))
471
475
  md[:@id] ||= link
472
476
  md
473
477
  when Hash
474
- Schema.new(value, @options.merge(parent: self, context: nil))
478
+ Schema.new(value, **@options.merge(parent: self, context: nil))
475
479
  when Schema
476
480
  value
477
481
  else
478
482
  log_warn "#{type} has invalid property 'tableSchema' (#{value.inspect}): expected a URL or object"
479
- Schema.new({}, @options.merge(parent: self, context: nil))
483
+ Schema.new({}, **@options.merge(parent: self, context: nil))
480
484
  end
481
485
  end
482
486
 
@@ -491,7 +495,7 @@ module RDF::Tabular
491
495
  when object[:dialect] then object[:dialect]
492
496
  when parent then parent.dialect
493
497
  when is_a?(Table) || is_a?(TableGroup)
494
- d = Dialect.new({}, @options.merge(parent: self, context: nil))
498
+ d = Dialect.new({}, **@options.merge(parent: self, context: nil))
495
499
  self.dialect = d unless self.parent
496
500
  d
497
501
  else
@@ -514,11 +518,11 @@ module RDF::Tabular
514
518
  @dialect = object[:dialect] = case value
515
519
  when String
516
520
  link = context.base.join(value).to_s
517
- md = Metadata.open(link, @options.merge(parent: self, context: nil, normalize: true))
521
+ md = Metadata.open(link, **@options.merge(parent: self, context: nil, normalize: true))
518
522
  md[:@id] ||= link
519
523
  md
520
524
  when Hash
521
- Dialect.new(value, @options.merge(parent: self, context: nil))
525
+ Dialect.new(value, **@options.merge(parent: self, context: nil))
522
526
  when Dialect
523
527
  value
524
528
  else
@@ -532,8 +536,8 @@ module RDF::Tabular
532
536
  # @raise [Error] if datatype is not valid
533
537
  def datatype=(value)
534
538
  val = case value
535
- when Hash then Datatype.new(value, @options.merge(parent: self))
536
- else Datatype.new({base: value}, @options.merge(parent: self))
539
+ when Hash then Datatype.new(value, **@options.merge(parent: self))
540
+ else Datatype.new({base: value}, **@options.merge(parent: self))
537
541
  end
538
542
 
539
543
  if val.valid? || value.is_a?(Hash)
@@ -564,7 +568,7 @@ module RDF::Tabular
564
568
  end
565
569
 
566
570
  ##
567
- # Validate metadata, raising an error containing all errors detected during validation
571
+ # Validate metadata and content, raising an error containing all errors detected during validation
568
572
  # @raise [Error] Raise error if metadata has any unexpected properties
569
573
  # @return [self]
570
574
  def validate
@@ -872,7 +876,7 @@ module RDF::Tabular
872
876
  csv << data unless data.empty?
873
877
  end
874
878
  else
875
- csv = ::CSV.new(input, csv_options)
879
+ csv = ::CSV.new(input, **csv_options)
876
880
  # Skip skipRows and headerRowCount
877
881
  skipped = (dialect.skipRows.to_i + dialect.headerRowCount)
878
882
  (1..skipped).each {csv.shift}
@@ -891,7 +895,7 @@ module RDF::Tabular
891
895
  next
892
896
  end
893
897
  number += 1
894
- row = Row.new(data, self, number, number + skipped, @options)
898
+ row = Row.new(data, self, number, number + skipped, **@options)
895
899
  (self.object[:rows] ||= []) << row if @options[:validate] # Keep track of rows when validating
896
900
  yield(row)
897
901
  end
@@ -1036,13 +1040,13 @@ module RDF::Tabular
1036
1040
  end
1037
1041
  index = 0
1038
1042
  object_columns.all? do |cb|
1039
- ca = non_virtual_columns[index] || Column.new({}, @options)
1043
+ ca = non_virtual_columns[index] || Column.new({}, **@options)
1040
1044
  ta = ca.titles || {}
1041
1045
  tb = cb.titles || {}
1042
1046
  if !ca.object.has_key?(:name) && !cb.object.has_key?(:name) && ta.empty? && tb.empty?
1043
1047
  true
1044
1048
  elsif ca.object.has_key?(:name) && cb.object.has_key?(:name)
1045
- raise Error, "Columns don't match on name: #{ca.name}, #{cb.name}" unless ca.name == cb.name
1049
+ raise Error, "Column #{index + 1} doesn't match on name: #{ca.name || 'no name'}, #{cb.name || 'no name'}" unless ca.name == cb.name
1046
1050
  elsif @options[:validate] || !ta.empty? && !tb.empty?
1047
1051
  # If validating, column compatibility requires strict match between titles
1048
1052
  titles_match = case
@@ -1066,10 +1070,10 @@ module RDF::Tabular
1066
1070
  true
1067
1071
  elsif !@options[:validate]
1068
1072
  # If not validating, columns don't match, but processing continues
1069
- log_warn "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1073
+ log_warn "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
1070
1074
  true
1071
1075
  else
1072
- raise Error, "Columns don't match on titles: #{ca.titles.inspect} vs #{cb.titles.inspect}"
1076
+ raise Error, "Column #{index + 1} doesn't match on titles: #{Array(ta['und']).join(',').inspect} vs #{Array(tb['und']).join(',').inspect}"
1073
1077
  end
1074
1078
  end
1075
1079
  index += 1
@@ -1235,13 +1239,13 @@ module RDF::Tabular
1235
1239
  end
1236
1240
 
1237
1241
  # General setter for array properties
1238
- def set_array_value(key, value, klass, options={})
1242
+ def set_array_value(key, value, klass, **options)
1239
1243
  object[key] = case value
1240
1244
  when Array
1241
1245
  value.map do |v|
1242
1246
  case v
1243
1247
  when Hash
1244
- klass.new(v, @options.merge(options).merge(parent: self, context: nil))
1248
+ klass.new(v, **@options.merge(options).merge(parent: self, context: nil))
1245
1249
  else v
1246
1250
  end
1247
1251
  end
@@ -1282,11 +1286,11 @@ module RDF::Tabular
1282
1286
  class DebugContext
1283
1287
  include RDF::Util::Logger
1284
1288
  end
1285
- def self.log_debug(*args, &block)
1286
- DebugContext.new.log_debug(*args, &block)
1289
+ def self.log_debug(*args, **options, &block)
1290
+ DebugContext.new.log_debug(*args, **options, &block)
1287
1291
  end
1288
- def self.log_warn(*args)
1289
- DebugContext.new.log_warn(*args)
1292
+ def self.log_warn(*args, **options)
1293
+ DebugContext.new.log_warn(*args, **options)
1290
1294
  end
1291
1295
  end
1292
1296
 
@@ -1434,7 +1438,7 @@ module RDF::Tabular
1434
1438
  content['@context'] = object.delete(:@context) if object[:@context]
1435
1439
  ctx = @context
1436
1440
  remove_instance_variable(:@context) if instance_variables.include?(:@context)
1437
- tg = TableGroup.new(content, @options.merge(context: ctx, filenames: @filenames, base: base))
1441
+ tg = TableGroup.new(content, **@options.merge(context: ctx, filenames: @filenames, base: base))
1438
1442
  @parent = tg # Link from parent
1439
1443
  tg
1440
1444
  end
@@ -1489,7 +1493,7 @@ module RDF::Tabular
1489
1493
  number += 1
1490
1494
  case v
1491
1495
  when Hash
1492
- Column.new(v, @options.merge(
1496
+ Column.new(v, **@options.merge(
1493
1497
  table: (parent if parent.is_a?(Table)),
1494
1498
  parent: self,
1495
1499
  context: nil,
@@ -1621,8 +1625,8 @@ module RDF::Tabular
1621
1625
  def name
1622
1626
  self[:name] || if titles && (ts = titles[context.default_language || 'und'] || titles[self.lang || 'und'])
1623
1627
  n = Array(ts).first
1624
- n0 = URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
1625
- n1 = URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
1628
+ n0 = RDF::URI.encode(n[0,1], /[^a-zA-Z0-9]/).encode("utf-8")
1629
+ n1 = RDF::URI.encode(n[1..-1], /[^\w\.]/).encode("utf-8")
1626
1630
  "#{n0}#{n1}"
1627
1631
  end || "_col.#{number}"
1628
1632
  end
@@ -1783,12 +1787,12 @@ module RDF::Tabular
1783
1787
  # @option options [String] :lang, language to set in table, if any
1784
1788
  # @return [Metadata] Tabular metadata
1785
1789
  # @see http://w3c.github.io/csvw/syntax/#parsing
1786
- def embedded_metadata(input, metadata, options = {})
1790
+ def embedded_metadata(input, metadata, **options)
1787
1791
  options = options.dup
1788
1792
  options.delete(:context) # Don't accidentally use a passed context
1789
1793
  # Normalize input to an IO object
1790
1794
  if input.is_a?(String)
1791
- return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, options.merge(base: input.to_s))}
1795
+ return ::RDF::Util::File.open_file(input) {|f| embedded_metadata(f, metadata, **options.merge(base: input.to_s))}
1792
1796
  end
1793
1797
 
1794
1798
  table = {
@@ -1826,7 +1830,7 @@ module RDF::Tabular
1826
1830
  row.xpath('th').map(&:content).each_with_index do |value, index|
1827
1831
  # Skip columns
1828
1832
  skipCols = skipColumns.to_i
1829
- next if index < skipCols
1833
+ next if index < skipCols || value.to_s.empty?
1830
1834
 
1831
1835
  # Trim value
1832
1836
  value.lstrip! if %w(true start).include?(trim.to_s)
@@ -1837,11 +1841,11 @@ module RDF::Tabular
1837
1841
  column = columns[index - skipCols] ||= {
1838
1842
  "titles" => {lang => []},
1839
1843
  }
1840
- column["titles"][lang] << value
1844
+ column["titles"][lang] << value if value
1841
1845
  end
1842
1846
  end
1843
1847
  else
1844
- csv = ::CSV.new(input, csv_options)
1848
+ csv = ::CSV.new(input, **csv_options)
1845
1849
  (1..skipRows.to_i).each do
1846
1850
  value = csv.shift.join(delimiter) # Skip initial lines, these form comment annotations
1847
1851
  # Trim value
@@ -1858,7 +1862,7 @@ module RDF::Tabular
1858
1862
  Array(row_data).each_with_index do |value, index|
1859
1863
  # Skip columns
1860
1864
  skipCols = skipColumns.to_i
1861
- next if index < skipCols
1865
+ next if index < skipCols || value.to_s.empty?
1862
1866
 
1863
1867
  # Trim value
1864
1868
  value.lstrip! if %w(true start).include?(trim.to_s)
@@ -1876,7 +1880,7 @@ module RDF::Tabular
1876
1880
  log_debug("embedded_metadata") {"table: #{table.inspect}"}
1877
1881
  input.rewind if input.respond_to?(:rewind)
1878
1882
 
1879
- Table.new(table, options.merge(reason: "load embedded metadata: #{table['@id']}"))
1883
+ Table.new(table, **options.merge(reason: "load embedded metadata: #{table['@id']}"))
1880
1884
  end
1881
1885
  end
1882
1886
 
@@ -1947,13 +1951,14 @@ module RDF::Tabular
1947
1951
  class Row
1948
1952
  # Class for returning values
1949
1953
  Cell = Struct.new(:table, :column, :row, :stringValue, :aboutUrl, :propertyUrl, :valueUrl, :value, :errors) do
1950
- def set_urls(mapped_values)
1954
+ def set_urls(mapped_values, decode_uri)
1951
1955
  %w(aboutUrl propertyUrl valueUrl).each do |prop|
1952
1956
  # If the cell value is nil, and it is not a virtual column
1953
1957
  next if prop == "valueUrl" && value.nil? && !column.virtual
1954
1958
  if v = column.send(prop.to_sym)
1955
1959
  t = Addressable::Template.new(v)
1956
1960
  mapped = t.expand(mapped_values).to_s
1961
+ mapped = RDF::URI.decode(mapped) if decode_uri
1957
1962
  # FIXME: don't expand here, do it in CSV2RDF
1958
1963
  url = row.context.expand_iri(mapped, documentRelative: true)
1959
1964
  self.send("#{prop}=".to_sym, url)
@@ -2026,7 +2031,7 @@ module RDF::Tabular
2026
2031
  # @param [Hash{Symbol => Object}] options ({})
2027
2032
  # @option options [Boolean] :validate check for PK/FK consistency
2028
2033
  # @return [Row]
2029
- def initialize(row, metadata, number, source_number, options = {})
2034
+ def initialize(row, metadata, number, source_number, **options)
2030
2035
  @table = metadata
2031
2036
  @number = number
2032
2037
  @sourceNumber = source_number
@@ -2058,13 +2063,13 @@ module RDF::Tabular
2058
2063
 
2059
2064
  # create column if necessary
2060
2065
  columns[index - skipColumns] ||=
2061
- Column.new({}, options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
2066
+ Column.new({}, **options.merge(table: metadata, parent: metadata.tableSchema, number: index + 1 - skipColumns))
2062
2067
 
2063
2068
  column = columns[index - skipColumns]
2064
2069
 
2065
2070
  @values << cell = Cell.new(metadata, column, self, value)
2066
2071
 
2067
- datatype = column.datatype || Datatype.new({base: "string"}, options.merge(parent: column))
2072
+ datatype = column.datatype || Datatype.new({base: "string"}, **options.merge(parent: column))
2068
2073
  value = value.gsub(/\r\n\t/, ' ') unless %w(string json xml html anyAtomicType).include?(datatype.base)
2069
2074
  value = value.strip.gsub(/\s+/, ' ') unless %w(string json xml html anyAtomicType normalizedString).include?(datatype.base)
2070
2075
  # if the resulting string is an empty string, apply the remaining steps to the string given by the default property
@@ -2110,11 +2115,11 @@ module RDF::Tabular
2110
2115
  # Map URLs for row
2111
2116
  @values.each_with_index do |cell, index|
2112
2117
  mapped_values = map_values.merge(
2113
- "_name" => URI.decode(cell.column.name),
2118
+ "_name" => CGI.unescape(cell.column.name),
2114
2119
  "_column" => cell.column.number,
2115
2120
  "_sourceColumn" => cell.column.sourceNumber
2116
2121
  )
2117
- cell.set_urls(mapped_values)
2122
+ cell.set_urls(mapped_values, options[:decode_uri])
2118
2123
  end
2119
2124
  end
2120
2125