rdf-microdata 2.2.3 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +4 -11
- data/VERSION +1 -1
- data/lib/rdf/microdata.rb +0 -1
- data/lib/rdf/microdata/expansion.rb +2 -3
- data/lib/rdf/microdata/format.rb +3 -29
- data/lib/rdf/microdata/rdfa_reader.rb +4 -13
- data/lib/rdf/microdata/reader.rb +4 -12
- data/lib/rdf/microdata/reader/nokogiri.rb +2 -2
- metadata +43 -87
- data/lib/rdf/microdata/jsonld_reader.rb +0 -251
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6b5a076a574bb55589987b966068adb4e7df76b6a3e4dfae4b928dd5ea7419a9
|
4
|
+
data.tar.gz: 6b8a1a6992422c0cf86c33d4794f89ca69c1dc4b65a928802f467a7c71bf3b3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4036378c2e7f63285d3ae201e3c8972e0a2d9a006287d81d42b4f97aac8cb3de62345c9c13d5d773e1a2cf9cfbc10aed398806b9b45c11c4c15de7ebe036e5ad
|
7
|
+
data.tar.gz: 82106c791e7770e85c7779726575777ac7fe017640673d8ea14ef91937ea953a07ac8fdbcffeefe3d7188b68abc428967eeeb627edf5467a43ac3647e89bfa0f
|
data/README.md
CHANGED
@@ -45,11 +45,11 @@ GRDDL-type triple generation, such as for html>head>title anchor tags.
|
|
45
45
|
If the `RDFa` parser is available, {RDF::Microdata::Format} will not assert content type `text/html` or file extension `.html`, as this is also asserted by RDFa. Instead, the RDFa reader will invoke the microdata reader if an `@itemscope` attribute is detected.
|
46
46
|
|
47
47
|
## Dependencies
|
48
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (>=
|
49
|
-
* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>=
|
48
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (>= 3.1)
|
49
|
+
* [RDF::XSD](http://rubygems.org/gems/rdf-xsd) (>= 3.1)
|
50
50
|
* [HTMLEntities](https://rubygems.org/gems/htmlentities) ('>= 4.3.0')
|
51
|
-
* [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.
|
52
|
-
* Soft dependency on [Nokogumbo](https://github.com/rubys/nokogumbo) (
|
51
|
+
* [Nokogiri](http://rubygems.org/gems/nokogiri) (>= 1.10)
|
52
|
+
* Soft dependency on [Nokogumbo](https://github.com/rubys/nokogumbo) (~> 2.0)
|
53
53
|
|
54
54
|
## Documentation
|
55
55
|
Full documentation available on [Rubydoc.info][Microdata doc]
|
@@ -68,13 +68,6 @@ use {RDF::Microdata::RdfaReader} directly.
|
|
68
68
|
|
69
69
|
The reader exposes a `#rdfa` method, which can be used to retrieve the transformed HTML+RDFa
|
70
70
|
|
71
|
-
### JSON-lD-based Reader
|
72
|
-
There is an experimental reader based on transforming Microdata to JSON-LD. To invoke
|
73
|
-
this, add the `jsonld: true` option to the {RDF::Microdata::Reader.new}, or
|
74
|
-
use {RDF::Microdata::JsonLdReader} directly.
|
75
|
-
|
76
|
-
The reader exposes a `#json` method, which can be used to retrieve the generated JSON-LD
|
77
|
-
|
78
71
|
## Resources
|
79
72
|
* [RDF.rb][RDF.rb]
|
80
73
|
* [Documentation](http://www.rubydoc.info/github/ruby-rdf/rdf-microdata/)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.1.0
|
data/lib/rdf/microdata.rb
CHANGED
@@ -26,7 +26,6 @@ module RDF
|
|
26
26
|
require 'rdf/microdata/format'
|
27
27
|
require 'rdf/microdata/vocab'
|
28
28
|
autoload :Expansion, 'rdf/microdata/expansion'
|
29
|
-
autoload :JsonLdReader, 'rdf/microdata/jsonld_reader'
|
30
29
|
autoload :Profile, 'rdf/microdata/profile'
|
31
30
|
autoload :RdfaReader, 'rdf/microdata/rdfa_reader'
|
32
31
|
autoload :Reader, 'rdf/microdata/reader'
|
@@ -26,7 +26,6 @@ module RDF::Microdata
|
|
26
26
|
repo = RDF::Repository.new
|
27
27
|
repo << self # Add default graph
|
28
28
|
|
29
|
-
count = repo.count
|
30
29
|
log_debug("expand") {"Loaded #{repo.size} triples into default graph"}
|
31
30
|
|
32
31
|
repo = owl_entailment(repo)
|
@@ -38,7 +37,7 @@ module RDF::Microdata
|
|
38
37
|
end
|
39
38
|
|
40
39
|
def rule(name, &block)
|
41
|
-
Rule.new(name,
|
40
|
+
Rule.new(name, **@options, &block)
|
42
41
|
end
|
43
42
|
|
44
43
|
##
|
@@ -72,7 +71,7 @@ module RDF::Microdata
|
|
72
71
|
# r.execute(queryable) {|statement| puts statement.inspect}
|
73
72
|
#
|
74
73
|
# @param [String] name
|
75
|
-
def initialize(name, options
|
74
|
+
def initialize(name, **options, &block)
|
76
75
|
@antecedents = []
|
77
76
|
@consequents = []
|
78
77
|
@options = options.dup
|
data/lib/rdf/microdata/format.rb
CHANGED
@@ -55,7 +55,7 @@ module RDF::Microdata
|
|
55
55
|
format: :microdata
|
56
56
|
},
|
57
57
|
option_use: {output_format: :disabled},
|
58
|
-
lambda: ->(files, options) do
|
58
|
+
lambda: ->(files, **options) do
|
59
59
|
out = options[:output] || $stdout
|
60
60
|
xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
61
61
|
<xsl:param name="indent-increment" select="' '"/>
|
@@ -107,7 +107,7 @@ module RDF::Microdata
|
|
107
107
|
# If files are empty, either use options[::evaluate]
|
108
108
|
input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
|
109
109
|
input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
|
110
|
-
RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
|
110
|
+
RDF::Microdata::Reader.new(input, **options.merge(rdfa: true)) do |reader|
|
111
111
|
reader.rdfa.xpath("//text()").each do |txt|
|
112
112
|
txt.content = txt.content.to_s.strip
|
113
113
|
end
|
@@ -115,7 +115,7 @@ module RDF::Microdata
|
|
115
115
|
end
|
116
116
|
else
|
117
117
|
files.each do |file|
|
118
|
-
RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
|
118
|
+
RDF::Microdata::Reader.open(file, **options.merge(rdfa: true)) do |reader|
|
119
119
|
reader.rdfa.xpath("//text()").each do |txt|
|
120
120
|
txt.content = txt.content.to_s.strip
|
121
121
|
end
|
@@ -125,32 +125,6 @@ module RDF::Microdata
|
|
125
125
|
end
|
126
126
|
end
|
127
127
|
},
|
128
|
-
"to-jsonld": {
|
129
|
-
description: "Transform HTML+Microdata into JSON-LD",
|
130
|
-
parse: false,
|
131
|
-
help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
|
132
|
-
filter: {
|
133
|
-
format: :microdata
|
134
|
-
},
|
135
|
-
option_use: {output_format: :disabled},
|
136
|
-
lambda: ->(files, options) do
|
137
|
-
out = options[:output] || $stdout
|
138
|
-
if files.empty?
|
139
|
-
# If files are empty, either use options[::evaluate]
|
140
|
-
input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
|
141
|
-
input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
|
142
|
-
RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
|
143
|
-
out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
|
144
|
-
end
|
145
|
-
else
|
146
|
-
files.each do |file|
|
147
|
-
RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
|
148
|
-
out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
|
-
},
|
154
128
|
}
|
155
129
|
end
|
156
130
|
end
|
@@ -29,7 +29,7 @@ module RDF::Microdata
|
|
29
29
|
# @yieldparam [RDF::Reader] reader
|
30
30
|
# @yieldreturn [void] ignored
|
31
31
|
# @raise [RDF::ReaderError] if _validate_
|
32
|
-
def initialize(input = $stdin, options
|
32
|
+
def initialize(input = $stdin, **options, &block)
|
33
33
|
@options = options
|
34
34
|
log_debug('', "using RDFa transformation reader")
|
35
35
|
|
@@ -46,15 +46,6 @@ module RDF::Microdata
|
|
46
46
|
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
47
47
|
end
|
48
48
|
|
49
|
-
# Load registry
|
50
|
-
begin
|
51
|
-
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
52
|
-
log_debug('', "registry = #{registry_uri.inspect}")
|
53
|
-
Registry.load_registry(registry_uri)
|
54
|
-
rescue JSON::ParserError => e
|
55
|
-
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
56
|
-
end
|
57
|
-
|
58
49
|
# For all members having @itemscope
|
59
50
|
input.css("[itemscope]").each do |item|
|
60
51
|
# Get @itemtypes to create @type and @vocab
|
@@ -69,8 +60,8 @@ module RDF::Microdata
|
|
69
60
|
|
70
61
|
item['typeof'] = types.join(' ') unless types.empty?
|
71
62
|
if vocab = types.first
|
72
|
-
vocab =
|
73
|
-
type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
|
63
|
+
vocab = begin
|
64
|
+
type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1')
|
74
65
|
Registry.new(type_vocab) if type_vocab
|
75
66
|
end
|
76
67
|
item['vocab'] = vocab.uri.to_s if vocab
|
@@ -124,7 +115,7 @@ module RDF::Microdata
|
|
124
115
|
version: :"rdfa1.1")
|
125
116
|
|
126
117
|
# Rely on RDFa reader
|
127
|
-
super(input, options, &block)
|
118
|
+
super(input, **options, &block)
|
128
119
|
end
|
129
120
|
end
|
130
121
|
end
|
data/lib/rdf/microdata/reader.rb
CHANGED
@@ -54,7 +54,7 @@ module RDF::Microdata
|
|
54
54
|
# Redirect for RDFa Reader given `:rdfa` option
|
55
55
|
#
|
56
56
|
# @private
|
57
|
-
def self.new(input = nil, options
|
57
|
+
def self.new(input = nil, **options, &block)
|
58
58
|
klass = if options[:rdfa]
|
59
59
|
# Requires rdf-rdfa gem to be loaded
|
60
60
|
begin
|
@@ -63,19 +63,11 @@ module RDF::Microdata
|
|
63
63
|
raise ReaderError, "Use of RDFa-based reader requires rdf-rdfa gem"
|
64
64
|
end
|
65
65
|
RdfaReader
|
66
|
-
elsif options[:jsonld]
|
67
|
-
# Requires rdf-rdfa gem to be loaded
|
68
|
-
begin
|
69
|
-
require 'json/ld'
|
70
|
-
rescue LoadError
|
71
|
-
raise ReaderError, "Use of JSON-LD-based reader requires json-ld gem"
|
72
|
-
end
|
73
|
-
JsonLdReader
|
74
66
|
else
|
75
67
|
self
|
76
68
|
end
|
77
69
|
reader = klass.allocate
|
78
|
-
reader.send(:initialize, input, options, &block)
|
70
|
+
reader.send(:initialize, input, **options, &block)
|
79
71
|
reader
|
80
72
|
end
|
81
73
|
|
@@ -102,7 +94,7 @@ module RDF::Microdata
|
|
102
94
|
# @yieldparam [RDF::Reader] reader
|
103
95
|
# @yieldreturn [void] ignored
|
104
96
|
# @raise [Error] Raises `RDF::ReaderError` when validating
|
105
|
-
def initialize(input = $stdin, options
|
97
|
+
def initialize(input = $stdin, **options, &block)
|
106
98
|
super do
|
107
99
|
@library = :nokogiri
|
108
100
|
|
@@ -111,7 +103,7 @@ module RDF::Microdata
|
|
111
103
|
self.extend(@implementation)
|
112
104
|
|
113
105
|
input.rewind if input.respond_to?(:rewind)
|
114
|
-
initialize_html(input, options) rescue log_fatal($!.message, exception: RDF::ReaderError)
|
106
|
+
initialize_html(input, **options) rescue log_fatal($!.message, exception: RDF::ReaderError)
|
115
107
|
|
116
108
|
log_error("Empty document") if root.nil?
|
117
109
|
log_error(doc_errors.map(&:message).uniq.join("\n")) if !doc_errors.empty?
|
@@ -178,7 +178,7 @@ module RDF::Microdata
|
|
178
178
|
#
|
179
179
|
# @param [Hash{Symbol => Object}] options
|
180
180
|
# @return [void]
|
181
|
-
def initialize_html(input, options
|
181
|
+
def initialize_html(input, **options)
|
182
182
|
require 'nokogiri' unless defined?(::Nokogiri)
|
183
183
|
@doc = case input
|
184
184
|
when ::Nokogiri::XML::Document
|
@@ -194,7 +194,7 @@ module RDF::Microdata
|
|
194
194
|
begin
|
195
195
|
require 'nokogumbo' unless defined?(::Nokogumbo)
|
196
196
|
input = input.read if input.respond_to?(:read)
|
197
|
-
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
197
|
+
::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000)
|
198
198
|
rescue LoadError
|
199
199
|
::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
|
200
200
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-microdata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg
|
@@ -9,48 +9,36 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-12-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdf
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
19
|
-
- !ruby/object:Gem::Version
|
20
|
-
version: 2.2.8
|
21
|
-
- - "<"
|
18
|
+
- - "~>"
|
22
19
|
- !ruby/object:Gem::Version
|
23
|
-
version: '
|
20
|
+
version: '3.1'
|
24
21
|
type: :runtime
|
25
22
|
prerelease: false
|
26
23
|
version_requirements: !ruby/object:Gem::Requirement
|
27
24
|
requirements:
|
28
|
-
- - "
|
29
|
-
- !ruby/object:Gem::Version
|
30
|
-
version: 2.2.8
|
31
|
-
- - "<"
|
25
|
+
- - "~>"
|
32
26
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
27
|
+
version: '3.1'
|
34
28
|
- !ruby/object:Gem::Dependency
|
35
29
|
name: rdf-xsd
|
36
30
|
requirement: !ruby/object:Gem::Requirement
|
37
31
|
requirements:
|
38
|
-
- - "
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '2.2'
|
41
|
-
- - "<"
|
32
|
+
- - "~>"
|
42
33
|
- !ruby/object:Gem::Version
|
43
|
-
version: '
|
34
|
+
version: '3.1'
|
44
35
|
type: :runtime
|
45
36
|
prerelease: false
|
46
37
|
version_requirements: !ruby/object:Gem::Requirement
|
47
38
|
requirements:
|
48
|
-
- - "
|
49
|
-
- !ruby/object:Gem::Version
|
50
|
-
version: '2.2'
|
51
|
-
- - "<"
|
39
|
+
- - "~>"
|
52
40
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
41
|
+
version: '3.1'
|
54
42
|
- !ruby/object:Gem::Dependency
|
55
43
|
name: htmlentities
|
56
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,14 +59,14 @@ dependencies:
|
|
71
59
|
requirements:
|
72
60
|
- - "~>"
|
73
61
|
- !ruby/object:Gem::Version
|
74
|
-
version: '1.
|
62
|
+
version: '1.10'
|
75
63
|
type: :runtime
|
76
64
|
prerelease: false
|
77
65
|
version_requirements: !ruby/object:Gem::Requirement
|
78
66
|
requirements:
|
79
67
|
- - "~>"
|
80
68
|
- !ruby/object:Gem::Version
|
81
|
-
version: '1.
|
69
|
+
version: '1.10'
|
82
70
|
- !ruby/object:Gem::Dependency
|
83
71
|
name: equivalent-xml
|
84
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -99,142 +87,112 @@ dependencies:
|
|
99
87
|
requirements:
|
100
88
|
- - "~>"
|
101
89
|
- !ruby/object:Gem::Version
|
102
|
-
version:
|
90
|
+
version: 0.9.20
|
103
91
|
type: :development
|
104
92
|
prerelease: false
|
105
93
|
version_requirements: !ruby/object:Gem::Requirement
|
106
94
|
requirements:
|
107
95
|
- - "~>"
|
108
96
|
- !ruby/object:Gem::Version
|
109
|
-
version:
|
97
|
+
version: 0.9.20
|
110
98
|
- !ruby/object:Gem::Dependency
|
111
99
|
name: rspec
|
112
100
|
requirement: !ruby/object:Gem::Requirement
|
113
101
|
requirements:
|
114
102
|
- - "~>"
|
115
103
|
- !ruby/object:Gem::Version
|
116
|
-
version: '3.
|
104
|
+
version: '3.9'
|
117
105
|
type: :development
|
118
106
|
prerelease: false
|
119
107
|
version_requirements: !ruby/object:Gem::Requirement
|
120
108
|
requirements:
|
121
109
|
- - "~>"
|
122
110
|
- !ruby/object:Gem::Version
|
123
|
-
version: '3.
|
111
|
+
version: '3.9'
|
124
112
|
- !ruby/object:Gem::Dependency
|
125
113
|
name: rspec-its
|
126
114
|
requirement: !ruby/object:Gem::Requirement
|
127
115
|
requirements:
|
128
116
|
- - "~>"
|
129
117
|
- !ruby/object:Gem::Version
|
130
|
-
version: '1.
|
118
|
+
version: '1.3'
|
131
119
|
type: :development
|
132
120
|
prerelease: false
|
133
121
|
version_requirements: !ruby/object:Gem::Requirement
|
134
122
|
requirements:
|
135
123
|
- - "~>"
|
136
124
|
- !ruby/object:Gem::Version
|
137
|
-
version: '1.
|
125
|
+
version: '1.3'
|
138
126
|
- !ruby/object:Gem::Dependency
|
139
|
-
name:
|
127
|
+
name: rdf-spec
|
140
128
|
requirement: !ruby/object:Gem::Requirement
|
141
129
|
requirements:
|
142
|
-
- - "
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '2.1'
|
145
|
-
- - "<"
|
130
|
+
- - "~>"
|
146
131
|
- !ruby/object:Gem::Version
|
147
|
-
version: '
|
132
|
+
version: '3.1'
|
148
133
|
type: :development
|
149
134
|
prerelease: false
|
150
135
|
version_requirements: !ruby/object:Gem::Requirement
|
151
136
|
requirements:
|
152
|
-
- - "
|
153
|
-
- !ruby/object:Gem::Version
|
154
|
-
version: '2.1'
|
155
|
-
- - "<"
|
137
|
+
- - "~>"
|
156
138
|
- !ruby/object:Gem::Version
|
157
|
-
version: '
|
139
|
+
version: '3.1'
|
158
140
|
- !ruby/object:Gem::Dependency
|
159
|
-
name: rdf-
|
141
|
+
name: rdf-turtle
|
160
142
|
requirement: !ruby/object:Gem::Requirement
|
161
143
|
requirements:
|
162
144
|
- - ">="
|
163
145
|
- !ruby/object:Gem::Version
|
164
|
-
version: '
|
165
|
-
- - "<"
|
166
|
-
- !ruby/object:Gem::Version
|
167
|
-
version: '4.0'
|
146
|
+
version: '3.1'
|
168
147
|
type: :development
|
169
148
|
prerelease: false
|
170
149
|
version_requirements: !ruby/object:Gem::Requirement
|
171
150
|
requirements:
|
172
151
|
- - ">="
|
173
152
|
- !ruby/object:Gem::Version
|
174
|
-
version: '
|
175
|
-
- - "<"
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
version: '4.0'
|
153
|
+
version: '3.1'
|
178
154
|
- !ruby/object:Gem::Dependency
|
179
|
-
name: rdf-
|
155
|
+
name: rdf-isomorphic
|
180
156
|
requirement: !ruby/object:Gem::Requirement
|
181
157
|
requirements:
|
182
|
-
- - "
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
version: '2.2'
|
185
|
-
- - "<"
|
158
|
+
- - "~>"
|
186
159
|
- !ruby/object:Gem::Version
|
187
|
-
version: '
|
160
|
+
version: '3.1'
|
188
161
|
type: :development
|
189
162
|
prerelease: false
|
190
163
|
version_requirements: !ruby/object:Gem::Requirement
|
191
164
|
requirements:
|
192
|
-
- - "
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
version: '2.2'
|
195
|
-
- - "<"
|
165
|
+
- - "~>"
|
196
166
|
- !ruby/object:Gem::Version
|
197
|
-
version: '
|
167
|
+
version: '3.1'
|
198
168
|
- !ruby/object:Gem::Dependency
|
199
|
-
name:
|
169
|
+
name: json-ld
|
200
170
|
requirement: !ruby/object:Gem::Requirement
|
201
171
|
requirements:
|
202
|
-
- - "
|
203
|
-
- !ruby/object:Gem::Version
|
204
|
-
version: '2.2'
|
205
|
-
- - "<"
|
172
|
+
- - "~>"
|
206
173
|
- !ruby/object:Gem::Version
|
207
|
-
version: '
|
174
|
+
version: '3.1'
|
208
175
|
type: :development
|
209
176
|
prerelease: false
|
210
177
|
version_requirements: !ruby/object:Gem::Requirement
|
211
178
|
requirements:
|
212
|
-
- - "
|
213
|
-
- !ruby/object:Gem::Version
|
214
|
-
version: '2.2'
|
215
|
-
- - "<"
|
179
|
+
- - "~>"
|
216
180
|
- !ruby/object:Gem::Version
|
217
|
-
version: '
|
181
|
+
version: '3.1'
|
218
182
|
- !ruby/object:Gem::Dependency
|
219
|
-
name: rdf-
|
183
|
+
name: rdf-rdfa
|
220
184
|
requirement: !ruby/object:Gem::Requirement
|
221
185
|
requirements:
|
222
|
-
- - "
|
223
|
-
- !ruby/object:Gem::Version
|
224
|
-
version: '2.2'
|
225
|
-
- - "<"
|
186
|
+
- - "~>"
|
226
187
|
- !ruby/object:Gem::Version
|
227
|
-
version: '
|
188
|
+
version: '3.1'
|
228
189
|
type: :development
|
229
190
|
prerelease: false
|
230
191
|
version_requirements: !ruby/object:Gem::Requirement
|
231
192
|
requirements:
|
232
|
-
- - "
|
233
|
-
- !ruby/object:Gem::Version
|
234
|
-
version: '2.2'
|
235
|
-
- - "<"
|
193
|
+
- - "~>"
|
236
194
|
- !ruby/object:Gem::Version
|
237
|
-
version: '
|
195
|
+
version: '3.1'
|
238
196
|
description: Reads HTML Microdata as RDF.
|
239
197
|
email: public-rdf-ruby@w3.org
|
240
198
|
executables: []
|
@@ -252,7 +210,6 @@ files:
|
|
252
210
|
- lib/rdf/microdata.rb
|
253
211
|
- lib/rdf/microdata/expansion.rb
|
254
212
|
- lib/rdf/microdata/format.rb
|
255
|
-
- lib/rdf/microdata/jsonld_reader.rb
|
256
213
|
- lib/rdf/microdata/rdfa_reader.rb
|
257
214
|
- lib/rdf/microdata/reader.rb
|
258
215
|
- lib/rdf/microdata/reader/nokogiri.rb
|
@@ -271,15 +228,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
271
228
|
requirements:
|
272
229
|
- - ">="
|
273
230
|
- !ruby/object:Gem::Version
|
274
|
-
version: 2.
|
231
|
+
version: '2.4'
|
275
232
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
276
233
|
requirements:
|
277
234
|
- - ">="
|
278
235
|
- !ruby/object:Gem::Version
|
279
236
|
version: '0'
|
280
237
|
requirements: []
|
281
|
-
|
282
|
-
rubygems_version: 2.6.14
|
238
|
+
rubygems_version: 3.0.6
|
283
239
|
signing_key:
|
284
240
|
specification_version: 4
|
285
241
|
summary: Microdata reader for Ruby.
|
@@ -1,251 +0,0 @@
|
|
1
|
-
require 'json/ld'
|
2
|
-
require 'nokogumbo'
|
3
|
-
|
4
|
-
module RDF::Microdata
|
5
|
-
##
|
6
|
-
# Update DOM to turn Microdata into JSON-LD and parse using the JSON-LD Reader
|
7
|
-
class JsonLdReader < JSON::LD::Reader
|
8
|
-
# The resulting JSON-LD
|
9
|
-
# @return [Hash]
|
10
|
-
attr_reader :jsonld
|
11
|
-
|
12
|
-
def self.format(klass = nil)
|
13
|
-
if klass.nil?
|
14
|
-
RDF::Microdata::Format
|
15
|
-
else
|
16
|
-
super
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
##
|
21
|
-
# Initializes the JsonLdReader instance.
|
22
|
-
#
|
23
|
-
# @param [IO, File, String] input
|
24
|
-
# the input stream to read
|
25
|
-
# @param [Hash{Symbol => Object}] options
|
26
|
-
# any additional options (see `RDF::Reader#initialize`)
|
27
|
-
# @return [reader]
|
28
|
-
# @yield [reader] `self`
|
29
|
-
# @yieldparam [RDF::Reader] reader
|
30
|
-
# @yieldreturn [void] ignored
|
31
|
-
# @raise [RDF::ReaderError] if _validate_
|
32
|
-
def initialize(input = $stdin, options = {}, &block)
|
33
|
-
@options = options
|
34
|
-
log_info('', "using JSON-LD transformation reader")
|
35
|
-
|
36
|
-
input = case input
|
37
|
-
when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input
|
38
|
-
else
|
39
|
-
# Try to detect charset from input
|
40
|
-
options[:encoding] ||= input.charset if input.respond_to?(:charset)
|
41
|
-
|
42
|
-
# Otherwise, default is utf-8
|
43
|
-
options[:encoding] ||= 'utf-8'
|
44
|
-
options[:encoding] = options[:encoding].to_s if options[:encoding]
|
45
|
-
input = input.read if input.respond_to?(:read)
|
46
|
-
::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
|
47
|
-
end
|
48
|
-
|
49
|
-
# Load registry
|
50
|
-
begin
|
51
|
-
registry_uri = options[:registry] || RDF::Microdata::DEFAULT_REGISTRY
|
52
|
-
log_debug('', "registry = #{registry_uri.inspect}")
|
53
|
-
Registry.load_registry(registry_uri)
|
54
|
-
rescue JSON::ParserError => e
|
55
|
-
log_fatal("Failed to parse registry: #{e.message}", exception: RDF::ReaderError) if (root.nil? && validate?)
|
56
|
-
end
|
57
|
-
|
58
|
-
@jsonld = {'@graph' => []}
|
59
|
-
|
60
|
-
# Start with all top-level items
|
61
|
-
input.css("[itemscope]").each do |item|
|
62
|
-
next if item['itemprop'] # Only top-level items
|
63
|
-
jsonld['@graph'] << get_object(item)
|
64
|
-
end
|
65
|
-
|
66
|
-
log_debug('', "Transformed document: #{jsonld.to_json(JSON::LD::JSON_STATE)}")
|
67
|
-
|
68
|
-
# Rely on RDFa reader
|
69
|
-
super(jsonld.to_json, options, &block)
|
70
|
-
end
|
71
|
-
|
72
|
-
private
|
73
|
-
# Return JSON-LD representation of an item
|
74
|
-
# @param [Nokogiri::XML::Element] item
|
75
|
-
# @param [Hash{Nokogiri::XML::Node => Hash}]
|
76
|
-
# @return [Hash]
|
77
|
-
def get_object(item, memory = {})
|
78
|
-
if result = memory[item]
|
79
|
-
# Result is a reference to that item; assign a blank-node identifier if necessary
|
80
|
-
result['@id'] ||= alloc_bnode
|
81
|
-
return result
|
82
|
-
end
|
83
|
-
|
84
|
-
result = {}
|
85
|
-
memory[item] = result
|
86
|
-
|
87
|
-
# If the item has a global identifier, add an entry to result called "@id" whose value is the global identifier of item.
|
88
|
-
result['@id'] = item['itemid'].to_s if item['itemid']
|
89
|
-
|
90
|
-
# If the item has any item types, add an entry to result called "@type" whose value is an array listing the item types of item, in the order they were specified on the itemtype attribute.
|
91
|
-
if item['itemtype']
|
92
|
-
# Only absolute URLs
|
93
|
-
types = item.attribute('itemtype').
|
94
|
-
remove.
|
95
|
-
to_s.
|
96
|
-
split(/\s+/).
|
97
|
-
select {|t| RDF::URI(t).absolute?}
|
98
|
-
if vocab = types.first
|
99
|
-
vocab = Registry.find(vocab) || begin
|
100
|
-
type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') unless vocab.nil?
|
101
|
-
Registry.new(type_vocab) if type_vocab
|
102
|
-
end
|
103
|
-
(result['@context'] = {})['@vocab'] = vocab.uri.to_s if vocab
|
104
|
-
result['@type'] = types unless types.empty?
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# For each element element that has one or more property names and is one of the properties of the item item, in the order those elements are given by the algorithm that returns the properties of an item, run the following substeps
|
109
|
-
item_properties(item).each do |element|
|
110
|
-
value = if element['itemscope']
|
111
|
-
get_object(element, memory)
|
112
|
-
else
|
113
|
-
property_value(element)
|
114
|
-
end
|
115
|
-
element['itemprop'].to_s.split(/\s+/).each do |prop|
|
116
|
-
result[prop] ||= [] << value
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
result
|
121
|
-
end
|
122
|
-
|
123
|
-
##
|
124
|
-
#
|
125
|
-
# @param [Nokogiri::XML::Element] item
|
126
|
-
# @return [Array<Nokogiri::XML::Element>]
|
127
|
-
# List of property elements for an item
|
128
|
-
def item_properties(item)
|
129
|
-
results, memory, pending = [], [item], item.children.select(&:element?)
|
130
|
-
log_debug(item, "item_properties")
|
131
|
-
|
132
|
-
# If root has an itemref attribute, split the value of that itemref attribute on spaces. For each resulting token ID, if there is an element in the document whose ID is ID, then add the first such element to pending.
|
133
|
-
item['itemref'].to_s.split(/\s+/).each do |ref|
|
134
|
-
if referenced = referenced = item.at_css("##{ref}")
|
135
|
-
pending << referenced
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
while !pending.empty?
|
140
|
-
current = pending.shift
|
141
|
-
# Error
|
142
|
-
break if memory.include?(current)
|
143
|
-
memory << current
|
144
|
-
|
145
|
-
# If current does not have an itemscope attribute, then: add all the child elements of current to pending.
|
146
|
-
pending += current.children.select(&:element?) unless current['itemscope']
|
147
|
-
|
148
|
-
# If current has an itemprop attribute specified and has one or more property names, then add current to results.
|
149
|
-
results << current unless current['itemprop'].to_s.split(/\s+/).empty?
|
150
|
-
end
|
151
|
-
|
152
|
-
results
|
153
|
-
end
|
154
|
-
|
155
|
-
##
|
156
|
-
#
|
157
|
-
def property_value(element)
|
158
|
-
base = element.base || base_uri
|
159
|
-
log_debug(element) {"property_value(#{element.name}): base #{base.inspect}"}
|
160
|
-
value = case
|
161
|
-
when element.has_attribute?('itemscope')
|
162
|
-
{}
|
163
|
-
when element.has_attribute?('content')
|
164
|
-
if element.language
|
165
|
-
{"@value" => element['content'].to_s.strip, language: element.language}
|
166
|
-
else
|
167
|
-
element['content'].to_s.strip
|
168
|
-
end
|
169
|
-
when %w(data meter).include?(element.name) && element.attribute('value')
|
170
|
-
# XXX parse as number?
|
171
|
-
{"@value" => element['value'].to_s.strip}
|
172
|
-
when %w(audio embed iframe img source track video).include?(element.name)
|
173
|
-
{"@id" => uri(element.attribute('src'), base).to_s}
|
174
|
-
when %w(a area link).include?(element.name)
|
175
|
-
{"@id" => uri(element.attribute('href'), base).to_s}
|
176
|
-
when %w(object).include?(element.name)
|
177
|
-
{"@id" => uri(element.attribute('data'), base).to_s}
|
178
|
-
when %w(time).include?(element.name)
|
179
|
-
# use datatype?
|
180
|
-
(element.attribute('datetime') || element.text).to_s.strip
|
181
|
-
else
|
182
|
-
if element.language
|
183
|
-
{"@value" => element.inner_text.to_s.strip, language: element.language}
|
184
|
-
else
|
185
|
-
element.inner_text.to_s.strip
|
186
|
-
end
|
187
|
-
end
|
188
|
-
log_debug(element) {" #{value.inspect}"}
|
189
|
-
value
|
190
|
-
end
|
191
|
-
|
192
|
-
# Allocate a new blank node identifier
|
193
|
-
# @return [String]
|
194
|
-
def alloc_bnode
|
195
|
-
@bnode_base ||= "_:a"
|
196
|
-
res = @bnode_base
|
197
|
-
@bnode_base = res.succ
|
198
|
-
res
|
199
|
-
end
|
200
|
-
|
201
|
-
# Fixme, what about xml:base relative to element?
|
202
|
-
def uri(value, base = nil)
|
203
|
-
value = if base
|
204
|
-
base = uri(base) unless base.is_a?(RDF::URI)
|
205
|
-
base.join(value.to_s)
|
206
|
-
else
|
207
|
-
RDF::URI(value.to_s)
|
208
|
-
end
|
209
|
-
value.validate! if validate?
|
210
|
-
value.canonicalize! if canonicalize?
|
211
|
-
value = RDF::URI.intern(value) if intern?
|
212
|
-
value
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
# Monkey Patch Nokogiri
|
218
|
-
module Nokogiri::XML
|
219
|
-
class Element
|
220
|
-
|
221
|
-
##
|
222
|
-
# Get any xml:base in effect for this element
|
223
|
-
def base
|
224
|
-
if @base.nil?
|
225
|
-
@base = attributes['xml:base'] ||
|
226
|
-
(parent && parent.element? && parent.base) ||
|
227
|
-
false
|
228
|
-
end
|
229
|
-
|
230
|
-
@base == false ? nil : @base
|
231
|
-
end
|
232
|
-
|
233
|
-
|
234
|
-
##
|
235
|
-
# Get any xml:lang or lang in effect for this element
|
236
|
-
def language
|
237
|
-
if @language.nil?
|
238
|
-
language = case
|
239
|
-
when self["xml:lang"]
|
240
|
-
self["xml:lang"].to_s
|
241
|
-
when self["lang"]
|
242
|
-
self["lang"].to_s
|
243
|
-
else
|
244
|
-
parent && parent.element? && parent.language
|
245
|
-
end
|
246
|
-
end
|
247
|
-
@language == false ? nil : @language
|
248
|
-
end
|
249
|
-
|
250
|
-
end
|
251
|
-
end
|