rdf-microdata 2.2.3 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 9abfe11678a3fda6c4ea8e22cce8bc2aa771e2ce
4
- data.tar.gz: 613faa6568d25156199440e06084d64d07f2de97
2
+ SHA256:
3
+ metadata.gz: 6b5a076a574bb55589987b966068adb4e7df76b6a3e4dfae4b928dd5ea7419a9
4
+ data.tar.gz: 6b8a1a6992422c0cf86c33d4794f89ca69c1dc4b65a928802f467a7c71bf3b3f
5
5
  SHA512:
6
- metadata.gz: 374b180bb0cab67a5fd333431e8b4c5aada1e46571c69e00fa6cf87415fcba7240196627524b0067e367f51a4859cab7d769736f520e7d8aa6c0386b2107d6ca
7
- data.tar.gz: 892c19c620622366dcadbb1757195f619fee2829915e26d19cd407cfd3592bd66a3b6b614c3b648b44796271f74d35dd3257123f05c3e4e401baf7d1988afb06
6
+ metadata.gz: 4036378c2e7f63285d3ae201e3c8972e0a2d9a006287d81d42b4f97aac8cb3de62345c9c13d5d773e1a2cf9cfbc10aed398806b9b45c11c4c15de7ebe036e5ad
7
+ data.tar.gz: 82106c791e7770e85c7779726575777ac7fe017640673d8ea14ef91937ea953a07ac8fdbcffeefe3d7188b68abc428967eeeb627edf5467a43ac3647e89bfa0f
data/README.md CHANGED
@@ -45,11 +45,11 @@ GRDDL-type triple generation, such as for html>head>title anchor tags.
45
45
  If the `RDFa` parser is available, {RDF::Microdata::Format} will not assert content type `text/html` or file extension `.html`, as this is also asserted by RDFa. Instead, the RDFa reader will invoke the microdata reader if an `@itemscope` attribute is detected.
46
46
 
47
47
  ## Dependencies
48
- * [RDF.rb](http://rubygems.org/gems/rdf) (>= 2.0)
49
- * [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 2.0)
48
+ * [RDF.rb](http://rubygems.org/gems/rdf) (>= 3.1)
49
+ * [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 3.1)
50
50
  * [HTMLEntities](https://rubygems.org/gems/htmlentities) ('>= 4.3.0')
51
- * [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.7.1)
52
- * Soft dependency on [Nokogumbo](https://github.com/rubys/nokogumbo) (>= 1.4.10)
51
+ * [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.10)
52
+ * Soft dependency on [Nokogumbo](https://github.com/rubys/nokogumbo) (~> 2.0)
53
53
 
54
54
  ## Documentation
55
55
  Full documentation available on [Rubydoc.info][Microdata doc]
@@ -68,13 +68,6 @@ use {RDF::Microdata::RdfaReader} directly.
68
68
 
69
69
  The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa
70
70
 
71
- ### JSON-lD-based Reader
72
- There is an experimental reader based on transforming Microdata to JSON-LD. To invoke
73
- this, add the `jsonld: true` option to the {RDF::Microdata::Reader.new}, or
74
- use {RDF::Microdata::JsonLdReader} directly.
75
-
76
- The reader exposes a `#json` method, which can be used to retrieve the generated JSON-LD
77
-
78
71
  ## Resources
79
72
  * [RDF.rb][RDF.rb]
80
73
  * [Documentation](http://www.rubydoc.info/github/ruby-rdf/rdf-microdata/)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.2.3
1
+ 3.1.0
@@ -26,7 +26,6 @@ module RDF
26
26
  require 'rdf/microdata/format'
27
27
  require 'rdf/microdata/vocab'
28
28
  autoload :Expansion, 'rdf/microdata/expansion'
29
- autoload :JsonLdReader, 'rdf/microdata/jsonld_reader'
30
29
  autoload :Profile, 'rdf/microdata/profile'
31
30
  autoload :RdfaReader, 'rdf/microdata/rdfa_reader'
32
31
  autoload :Reader, 'rdf/microdata/reader'
@@ -26,7 +26,6 @@ module RDF::Microdata
26
26
  repo = RDF::Repository.new
27
27
  repo << self # Add default graph
28
28
 
29
- count = repo.count
30
29
  log_debug("expand") {"Loaded #{repo.size} triples into default graph"}
31
30
 
32
31
  repo = owl_entailment(repo)
@@ -38,7 +37,7 @@ module RDF::Microdata
38
37
  end
39
38
 
40
39
  def rule(name, &block)
41
- Rule.new(name, @options, block)
40
+ Rule.new(name, **@options, &block)
42
41
  end
43
42
 
44
43
  ##
@@ -72,7 +71,7 @@ module RDF::Microdata
72
71
  # r.execute(queryable) {|statement| puts statement.inspect}
73
72
  #
74
73
  # @param [String] name
75
- def initialize(name, options = {}, &block)
74
+ def initialize(name, **options, &block)
76
75
  @antecedents = []
77
76
  @consequents = []
78
77
  @options = options.dup
@@ -55,7 +55,7 @@ module RDF::Microdata
55
55
  format: :microdata
56
56
  },
57
57
  option_use: {output_format: :disabled},
58
- lambda: ->(files, options) do
58
+ lambda: ->(files, **options) do
59
59
  out = options[:output] || $stdout
60
60
  xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
61
61
  <xsl:param name="indent-increment" select="' '"/>
@@ -107,7 +107,7 @@ module RDF::Microdata
107
107
  # If files are empty, either use options[::evaluate]
108
108
  input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
109
109
  input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
110
- RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
110
+ RDF::Microdata::Reader.new(input, **options.merge(rdfa: true)) do |reader|
111
111
  reader.rdfa.xpath("//text()").each do |txt|
112
112
  txt.content = txt.content.to_s.strip
113
113
  end
@@ -115,7 +115,7 @@ module RDF::Microdata
115
115
  end
116
116
  else
117
117
  files.each do |file|
118
- RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
118
+ RDF::Microdata::Reader.open(file, **options.merge(rdfa: true)) do |reader|
119
119
  reader.rdfa.xpath("//text()").each do |txt|
120
120
  txt.content = txt.content.to_s.strip
121
121
  end
@@ -125,32 +125,6 @@ module RDF::Microdata
125
125
  end
126
126
  end
127
127
  },
128
- "to-jsonld": {
129
- description: "Transform HTML+Microdata into JSON-LD",
130
- parse: false,
131
- help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
132
- filter: {
133
- format: :microdata
134
- },
135
- option_use: {output_format: :disabled},
136
- lambda: ->(files, options) do
137
- out = options[:output] || $stdout
138
- if files.empty?
139
- # If files are empty, either use options[::evaluate]
140
- input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
141
- input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
142
- RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
143
- out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
144
- end
145
- else
146
- files.each do |file|
147
- RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
148
- out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
149
- end
150
- end
151
- end
152
- end
153
- },
154
128
  }
155
129
  end
156
130
  end
@@ -29,7 +29,7 @@ module RDF::Microdata
29
29
  # @yieldparam [RDF::Reader] reader
30
30
  # @yieldreturn [void] ignored
31
31
  # @raise [RDF::ReaderError] if _validate_
32
- def initialize(input = $stdin, options = {}, &block)
32
+ def initialize(input = $stdin, **options, &block)
33
33
  @options = options
34
34
  log_debug('', "using RDFa transformation reader")
35
35
 
@@ -46,15 +46,6 @@ module RDF::Microdata
46
46
  ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
47
47
  end
48
48
 
49
- # Load registry
50
- begin
51
- registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
52
- log_debug('', "registry = #{registry_uri.inspect}")
53
- Registry.load_registry(registry_uri)
54
- rescue JSON::ParserError => e
55
- log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
56
- end
57
-
58
49
  # For all members having @itemscope
59
50
  input.css("[itemscope]").each do |item|
60
51
  # Get @itemtypes to create @type and @vocab
@@ -69,8 +60,8 @@ module RDF::Microdata
69
60
 
70
61
  item['typeof'] = types.join(' ') unless types.empty?
71
62
  if vocab = types.first
72
- vocab = Registry.find(vocab) || begin
73
- type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
63
+ vocab = begin
64
+ type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
74
65
  Registry.new(type_vocab) if type_vocab
75
66
  end
76
67
  item['vocab'] = vocab.uri.to_s if vocab
@@ -124,7 +115,7 @@ module RDF::Microdata
124
115
  version: :"rdfa1.1")
125
116
 
126
117
  # Rely on RDFa reader
127
- super(input, options, &block)
118
+ super(input, **options, &block)
128
119
  end
129
120
  end
130
121
  end
@@ -54,7 +54,7 @@ module RDF::Microdata
54
54
  # Redirect for RDFa Reader given `:rdfa` option
55
55
  #
56
56
  # @private
57
- def self.new(input = nil, options = {}, &block)
57
+ def self.new(input = nil, **options, &block)
58
58
  klass = if options[:rdfa]
59
59
  # Requires rdf-rdfa gem to be loaded
60
60
  begin
@@ -63,19 +63,11 @@ module RDF::Microdata
63
63
  raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
64
64
  end
65
65
  RdfaReader
66
- elsif options[:jsonld]
67
- # Requires rdf-rdfa gem to be loaded
68
- begin
69
- require 'json/ld'
70
- rescue LoadError
71
- raise ReaderError, "Use of JSON-LD-based reader requires json-ld gem"
72
- end
73
- JsonLdReader
74
66
  else
75
67
  self
76
68
  end
77
69
  reader = klass.allocate
78
- reader.send(:initialize, input, options, &block)
70
+ reader.send(:initialize, input, **options, &block)
79
71
  reader
80
72
  end
81
73
 
@@ -102,7 +94,7 @@ module RDF::Microdata
102
94
  # @yieldparam [RDF::Reader] reader
103
95
  # @yieldreturn [void] ignored
104
96
  # @raise [Error] Raises `RDF::ReaderError` when validating
105
- def initialize(input = $stdin, options = {}, &block)
97
+ def initialize(input = $stdin, **options, &block)
106
98
  super do
107
99
  @library = :nokogiri
108
100
 
@@ -111,7 +103,7 @@ module RDF::Microdata
111
103
  self.extend(@implementation)
112
104
 
113
105
  input.rewind if input.respond_to?(:rewind)
114
- initialize_html(input, options) rescue log_fatal($!.message, exception: RDF::ReaderError)
106
+ initialize_html(input, **options) rescue log_fatal($!.message, exception: RDF::ReaderError)
115
107
 
116
108
  log_error("Empty document") if root.nil?
117
109
  log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
@@ -178,7 +178,7 @@ module RDF::Microdata
178
178
  #
179
179
  # @param [Hash{Symbol => Object}] options
180
180
  # @return [void]
181
- def initialize_html(input, options = {})
181
+ def initialize_html(input, **options)
182
182
  require 'nokogiri' unless defined?(::Nokogiri)
183
183
  @doc = case input
184
184
  when ::Nokogiri::XML::Document
@@ -194,7 +194,7 @@ module RDF::Microdata
194
194
  begin
195
195
  require 'nokogumbo' unless defined?(::Nokogumbo)
196
196
  input = input.read if input.respond_to?(:read)
197
- ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
197
+ ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000)
198
198
  rescue LoadError
199
199
  ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
200
200
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdf-microdata
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.3
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregg
@@ -9,48 +9,36 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-12-13 00:00:00.000000000 Z
12
+ date: 2019-12-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdf
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - ">="
19
- - !ruby/object:Gem::Version
20
- version: 2.2.8
21
- - - "<"
18
+ - - "~>"
22
19
  - !ruby/object:Gem::Version
23
- version: '4.0'
20
+ version: '3.1'
24
21
  type: :runtime
25
22
  prerelease: false
26
23
  version_requirements: !ruby/object:Gem::Requirement
27
24
  requirements:
28
- - - ">="
29
- - !ruby/object:Gem::Version
30
- version: 2.2.8
31
- - - "<"
25
+ - - "~>"
32
26
  - !ruby/object:Gem::Version
33
- version: '4.0'
27
+ version: '3.1'
34
28
  - !ruby/object:Gem::Dependency
35
29
  name: rdf-xsd
36
30
  requirement: !ruby/object:Gem::Requirement
37
31
  requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '2.2'
41
- - - "<"
32
+ - - "~>"
42
33
  - !ruby/object:Gem::Version
43
- version: '4.0'
34
+ version: '3.1'
44
35
  type: :runtime
45
36
  prerelease: false
46
37
  version_requirements: !ruby/object:Gem::Requirement
47
38
  requirements:
48
- - - ">="
49
- - !ruby/object:Gem::Version
50
- version: '2.2'
51
- - - "<"
39
+ - - "~>"
52
40
  - !ruby/object:Gem::Version
53
- version: '4.0'
41
+ version: '3.1'
54
42
  - !ruby/object:Gem::Dependency
55
43
  name: htmlentities
56
44
  requirement: !ruby/object:Gem::Requirement
@@ -71,14 +59,14 @@ dependencies:
71
59
  requirements:
72
60
  - - "~>"
73
61
  - !ruby/object:Gem::Version
74
- version: '1.8'
62
+ version: '1.10'
75
63
  type: :runtime
76
64
  prerelease: false
77
65
  version_requirements: !ruby/object:Gem::Requirement
78
66
  requirements:
79
67
  - - "~>"
80
68
  - !ruby/object:Gem::Version
81
- version: '1.8'
69
+ version: '1.10'
82
70
  - !ruby/object:Gem::Dependency
83
71
  name: equivalent-xml
84
72
  requirement: !ruby/object:Gem::Requirement
@@ -99,142 +87,112 @@ dependencies:
99
87
  requirements:
100
88
  - - "~>"
101
89
  - !ruby/object:Gem::Version
102
- version: '0.9'
90
+ version: 0.9.20
103
91
  type: :development
104
92
  prerelease: false
105
93
  version_requirements: !ruby/object:Gem::Requirement
106
94
  requirements:
107
95
  - - "~>"
108
96
  - !ruby/object:Gem::Version
109
- version: '0.9'
97
+ version: 0.9.20
110
98
  - !ruby/object:Gem::Dependency
111
99
  name: rspec
112
100
  requirement: !ruby/object:Gem::Requirement
113
101
  requirements:
114
102
  - - "~>"
115
103
  - !ruby/object:Gem::Version
116
- version: '3.6'
104
+ version: '3.9'
117
105
  type: :development
118
106
  prerelease: false
119
107
  version_requirements: !ruby/object:Gem::Requirement
120
108
  requirements:
121
109
  - - "~>"
122
110
  - !ruby/object:Gem::Version
123
- version: '3.6'
111
+ version: '3.9'
124
112
  - !ruby/object:Gem::Dependency
125
113
  name: rspec-its
126
114
  requirement: !ruby/object:Gem::Requirement
127
115
  requirements:
128
116
  - - "~>"
129
117
  - !ruby/object:Gem::Version
130
- version: '1.2'
118
+ version: '1.3'
131
119
  type: :development
132
120
  prerelease: false
133
121
  version_requirements: !ruby/object:Gem::Requirement
134
122
  requirements:
135
123
  - - "~>"
136
124
  - !ruby/object:Gem::Version
137
- version: '1.2'
125
+ version: '1.3'
138
126
  - !ruby/object:Gem::Dependency
139
- name: json-ld
127
+ name: rdf-spec
140
128
  requirement: !ruby/object:Gem::Requirement
141
129
  requirements:
142
- - - ">="
143
- - !ruby/object:Gem::Version
144
- version: '2.1'
145
- - - "<"
130
+ - - "~>"
146
131
  - !ruby/object:Gem::Version
147
- version: '4.0'
132
+ version: '3.1'
148
133
  type: :development
149
134
  prerelease: false
150
135
  version_requirements: !ruby/object:Gem::Requirement
151
136
  requirements:
152
- - - ">="
153
- - !ruby/object:Gem::Version
154
- version: '2.1'
155
- - - "<"
137
+ - - "~>"
156
138
  - !ruby/object:Gem::Version
157
- version: '4.0'
139
+ version: '3.1'
158
140
  - !ruby/object:Gem::Dependency
159
- name: rdf-spec
141
+ name: rdf-turtle
160
142
  requirement: !ruby/object:Gem::Requirement
161
143
  requirements:
162
144
  - - ">="
163
145
  - !ruby/object:Gem::Version
164
- version: '2.2'
165
- - - "<"
166
- - !ruby/object:Gem::Version
167
- version: '4.0'
146
+ version: '3.1'
168
147
  type: :development
169
148
  prerelease: false
170
149
  version_requirements: !ruby/object:Gem::Requirement
171
150
  requirements:
172
151
  - - ">="
173
152
  - !ruby/object:Gem::Version
174
- version: '2.2'
175
- - - "<"
176
- - !ruby/object:Gem::Version
177
- version: '4.0'
153
+ version: '3.1'
178
154
  - !ruby/object:Gem::Dependency
179
- name: rdf-rdfa
155
+ name: rdf-isomorphic
180
156
  requirement: !ruby/object:Gem::Requirement
181
157
  requirements:
182
- - - ">="
183
- - !ruby/object:Gem::Version
184
- version: '2.2'
185
- - - "<"
158
+ - - "~>"
186
159
  - !ruby/object:Gem::Version
187
- version: '4.0'
160
+ version: '3.1'
188
161
  type: :development
189
162
  prerelease: false
190
163
  version_requirements: !ruby/object:Gem::Requirement
191
164
  requirements:
192
- - - ">="
193
- - !ruby/object:Gem::Version
194
- version: '2.2'
195
- - - "<"
165
+ - - "~>"
196
166
  - !ruby/object:Gem::Version
197
- version: '4.0'
167
+ version: '3.1'
198
168
  - !ruby/object:Gem::Dependency
199
- name: rdf-turtle
169
+ name: json-ld
200
170
  requirement: !ruby/object:Gem::Requirement
201
171
  requirements:
202
- - - ">="
203
- - !ruby/object:Gem::Version
204
- version: '2.2'
205
- - - "<"
172
+ - - "~>"
206
173
  - !ruby/object:Gem::Version
207
- version: '4.0'
174
+ version: '3.1'
208
175
  type: :development
209
176
  prerelease: false
210
177
  version_requirements: !ruby/object:Gem::Requirement
211
178
  requirements:
212
- - - ">="
213
- - !ruby/object:Gem::Version
214
- version: '2.2'
215
- - - "<"
179
+ - - "~>"
216
180
  - !ruby/object:Gem::Version
217
- version: '4.0'
181
+ version: '3.1'
218
182
  - !ruby/object:Gem::Dependency
219
- name: rdf-isomorphic
183
+ name: rdf-rdfa
220
184
  requirement: !ruby/object:Gem::Requirement
221
185
  requirements:
222
- - - ">="
223
- - !ruby/object:Gem::Version
224
- version: '2.2'
225
- - - "<"
186
+ - - "~>"
226
187
  - !ruby/object:Gem::Version
227
- version: '4.0'
188
+ version: '3.1'
228
189
  type: :development
229
190
  prerelease: false
230
191
  version_requirements: !ruby/object:Gem::Requirement
231
192
  requirements:
232
- - - ">="
233
- - !ruby/object:Gem::Version
234
- version: '2.2'
235
- - - "<"
193
+ - - "~>"
236
194
  - !ruby/object:Gem::Version
237
- version: '4.0'
195
+ version: '3.1'
238
196
  description: Reads HTML Microdata as RDF.
239
197
  email: public-rdf-ruby@w3.org
240
198
  executables: []
@@ -252,7 +210,6 @@ files:
252
210
  - lib/rdf/microdata.rb
253
211
  - lib/rdf/microdata/expansion.rb
254
212
  - lib/rdf/microdata/format.rb
255
- - lib/rdf/microdata/jsonld_reader.rb
256
213
  - lib/rdf/microdata/rdfa_reader.rb
257
214
  - lib/rdf/microdata/reader.rb
258
215
  - lib/rdf/microdata/reader/nokogiri.rb
@@ -271,15 +228,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
271
228
  requirements:
272
229
  - - ">="
273
230
  - !ruby/object:Gem::Version
274
- version: 2.2.2
231
+ version: '2.4'
275
232
  required_rubygems_version: !ruby/object:Gem::Requirement
276
233
  requirements:
277
234
  - - ">="
278
235
  - !ruby/object:Gem::Version
279
236
  version: '0'
280
237
  requirements: []
281
- rubyforge_project:
282
- rubygems_version: 2.6.14
238
+ rubygems_version: 3.0.6
283
239
  signing_key:
284
240
  specification_version: 4
285
241
  summary: Microdata reader for Ruby.
@@ -1,251 +0,0 @@
1
- require 'json/ld'
2
- require 'nokogumbo'
3
-
4
- module RDF::Microdata
5
- ##
6
- # Update DOM to turn Microdata into JSON-LD and parse using the JSON-LD Reader
7
- class JsonLdReader < JSON::LD::Reader
8
- # The resulting JSON-LD
9
- # @return [Hash]
10
- attr_reader :jsonld
11
-
12
- def self.format(klass = nil)
13
- if klass.nil?
14
- RDF::Microdata::Format
15
- else
16
- super
17
- end
18
- end
19
-
20
- ##
21
- # Initializes the JsonLdReader instance.
22
- #
23
- # @param [IO, File, String] input
24
- # the input stream to read
25
- # @param [Hash{Symbol => Object}] options
26
- # any additional options (see `RDF::Reader#initialize`)
27
- # @return [reader]
28
- # @yield [reader] `self`
29
- # @yieldparam [RDF::Reader] reader
30
- # @yieldreturn [void] ignored
31
- # @raise [RDF::ReaderError] if _validate_
32
- def initialize(input = $stdin, options = {}, &block)
33
- @options = options
34
- log_info('', "using JSON-LD transformation reader")
35
-
36
- input = case input
37
- when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
38
- else
39
- # Try to detect charset from input
40
- options[:encoding] ||= input.charset if input.respond_to?(:charset)
41
-
42
- # Otherwise, default is utf-8
43
- options[:encoding] ||= 'utf-8'
44
- options[:encoding] = options[:encoding].to_s if options[:encoding]
45
- input = input.read if input.respond_to?(:read)
46
- ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
47
- end
48
-
49
- # Load registry
50
- begin
51
- registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
52
- log_debug('', "registry = #{registry_uri.inspect}")
53
- Registry.load_registry(registry_uri)
54
- rescue JSON::ParserError => e
55
- log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
56
- end
57
-
58
- @jsonld = {'@graph' => []}
59
-
60
- # Start with all top-level items
61
- input.css("[itemscope]").each do |item|
62
- next if item['itemprop'] # Only top-level items
63
- jsonld['@graph'] << get_object(item)
64
- end
65
-
66
- log_debug('', "Transformed document: #{jsonld.to_json(JSON::LD::JSON_STATE)}")
67
-
68
- # Rely on RDFa reader
69
- super(jsonld.to_json, options, &block)
70
- end
71
-
72
- private
73
- # Return JSON-LD representation of an item
74
- # @param [Nokogiri::XML::Element] item
75
- # @param [Hash{Nokogiri::XML::Node => Hash}]
76
- # @return [Hash]
77
- def get_object(item, memory = {})
78
- if result = memory[item]
79
- # Result is a reference to that item; assign a blank-node identifier if necessary
80
- result['@id'] ||= alloc_bnode
81
- return result
82
- end
83
-
84
- result = {}
85
- memory[item] = result
86
-
87
- # If the item has a global identifier, add an entry to result called "@id" whose value is the global identifier of item.
88
- result['@id'] = item['itemid'].to_s if item['itemid']
89
-
90
- # If the item has any item types, add an entry to result called "@type" whose value is an array listing the item types of item, in the order they were specified on the itemtype attribute.
91
- if item['itemtype']
92
- # Only absolute URLs
93
- types = item.attribute('itemtype').
94
- remove.
95
- to_s.
96
- split(/\s+/).
97
- select {|t| RDF::URI(t).absolute?}
98
- if vocab = types.first
99
- vocab = Registry.find(vocab) || begin
100
- type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
101
- Registry.new(type_vocab) if type_vocab
102
- end
103
- (result['@context'] = {})['@vocab'] = vocab.uri.to_s if vocab
104
- result['@type'] = types unless types.empty?
105
- end
106
- end
107
-
108
- # For each element element that has one or more property names and is one of the properties of the item item, in the order those elements are given by the algorithm that returns the properties of an item, run the following substeps
109
- item_properties(item).each do |element|
110
- value = if element['itemscope']
111
- get_object(element, memory)
112
- else
113
- property_value(element)
114
- end
115
- element['itemprop'].to_s.split(/\s+/).each do |prop|
116
- result[prop] ||= [] << value
117
- end
118
- end
119
-
120
- result
121
- end
122
-
123
- ##
124
- #
125
- # @param [Nokogiri::XML::Element] item
126
- # @return [Array<Nokogiri::XML::Element>]
127
- # List of property elements for an item
128
- def item_properties(item)
129
- results, memory, pending = [], [item], item.children.select(&:element?)
130
- log_debug(item, "item_properties")
131
-
132
- # If root has an itemref attribute, split the value of that itemref attribute on spaces. For each resulting token ID, if there is an element in the document whose ID is ID, then add the first such element to pending.
133
- item['itemref'].to_s.split(/\s+/).each do |ref|
134
- if referenced = referenced = item.at_css("##{ref}")
135
- pending << referenced
136
- end
137
- end
138
-
139
- while !pending.empty?
140
- current = pending.shift
141
- # Error
142
- break if memory.include?(current)
143
- memory << current
144
-
145
- # If current does not have an itemscope attribute, then: add all the child elements of current to pending.
146
- pending += current.children.select(&:element?) unless current['itemscope']
147
-
148
- # If current has an itemprop attribute specified and has one or more property names, then add current to results.
149
- results << current unless current['itemprop'].to_s.split(/\s+/).empty?
150
- end
151
-
152
- results
153
- end
154
-
155
- ##
156
- #
157
- def property_value(element)
158
- base = element.base || base_uri
159
- log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"}
160
- value = case
161
- when element.has_attribute?('itemscope')
162
- {}
163
- when element.has_attribute?('content')
164
- if element.language
165
- {"@value" => element['content'].to_s.strip, language: element.language}
166
- else
167
- element['content'].to_s.strip
168
- end
169
- when %w(data meter).include?(element.name) && element.attribute('value')
170
- # XXX parse as number?
171
- {"@value" => element['value'].to_s.strip}
172
- when %w(audio embed iframe img source track video).include?(element.name)
173
- {"@id" => uri(element.attribute('src'), base).to_s}
174
- when %w(a area link).include?(element.name)
175
- {"@id" => uri(element.attribute('href'), base).to_s}
176
- when %w(object).include?(element.name)
177
- {"@id" => uri(element.attribute('data'), base).to_s}
178
- when %w(time).include?(element.name)
179
- # use datatype?
180
- (element.attribute('datetime') || element.text).to_s.strip
181
- else
182
- if element.language
183
- {"@value" => element.inner_text.to_s.strip, language: element.language}
184
- else
185
- element.inner_text.to_s.strip
186
- end
187
- end
188
- log_debug(element) {" #{value.inspect}"}
189
- value
190
- end
191
-
192
- # Allocate a new blank node identifier
193
- # @return [String]
194
- def alloc_bnode
195
- @bnode_base ||= "_:a"
196
- res = @bnode_base
197
- @bnode_base = res.succ
198
- res
199
- end
200
-
201
- # Fixme, what about xml:base relative to element?
202
- def uri(value, base = nil)
203
- value = if base
204
- base = uri(base) unless base.is_a?(RDF::URI)
205
- base.join(value.to_s)
206
- else
207
- RDF::URI(value.to_s)
208
- end
209
- value.validate! if validate?
210
- value.canonicalize! if canonicalize?
211
- value = RDF::URI.intern(value) if intern?
212
- value
213
- end
214
- end
215
- end
216
-
217
- # Monkey Patch Nokogiri
218
- module Nokogiri::XML
219
- class Element
220
-
221
- ##
222
- # Get any xml:base in effect for this element
223
- def base
224
- if @base.nil?
225
- @base = attributes['xml:base'] ||
226
- (parent && parent.element? && parent.base) ||
227
- false
228
- end
229
-
230
- @base == false ? nil : @base
231
- end
232
-
233
-
234
- ##
235
- # Get any xml:lang or lang in effect for this element
236
- def language
237
- if @language.nil?
238
- language = case
239
- when self["xml:lang"]
240
- self["xml:lang"].to_s
241
- when self["lang"]
242
- self["lang"].to_s
243
- else
244
- parent && parent.element? && parent.language
245
- end
246
- end
247
- @language == false ? nil : @language
248
- end
249
-
250
- end
251
- end