rdf-normalize 0.1.0 → 0.3.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/VERSION +1 -1
- data/lib/rdf/normalize.rb +0 -1
- data/lib/rdf/normalize/urdna2015.rb +22 -23
- data/lib/rdf/normalize/urgna2012.rb +3 -3
- data/lib/rdf/normalize/writer.rb +11 -12
- metadata +37 -20
- data/lib/rdf/normalize/utils.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1135be30a9a3c1f15e14fa57ff0dce5488a53734
|
4
|
+
data.tar.gz: fd85ba1edde8b8d03a297d65d4a69dae7da3f308
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6de1463ff4f57d54e937e24ba59134a8c8f4cf10f228e9835d032118a3b6297a40d485075c35e679db3eae176340df0492af88efca3613d0bb00d0f413bb9f1
|
7
|
+
data.tar.gz: 979b2463ccc9859cd752548a9f9532f1c16206205c5c54554408099f2167427a97dc3f4b9b6525fe3ee096dacecb916ec78d6f4b32e42189a426cedb1547fc5d
|
data/README.md
CHANGED
@@ -15,12 +15,12 @@ to serialize normalized statements.
|
|
15
15
|
Algorithms implemented:
|
16
16
|
|
17
17
|
* [URGNA2012](http://json-ld.github.io/normalization/spec/index.html#dfn-urgna2012)
|
18
|
-
* [
|
18
|
+
* [URDNA2015](http://json-ld.github.io/normalization/spec/index.html#dfn-urdna2015)
|
19
19
|
|
20
20
|
Install with `gem install rdf-normalize`
|
21
21
|
|
22
22
|
* 100% free and unencumbered [public domain](http://unlicense.org/) software.
|
23
|
-
* Compatible with Ruby >=
|
23
|
+
* Compatible with Ruby >= 2.0.
|
24
24
|
|
25
25
|
## Usage
|
26
26
|
|
@@ -38,8 +38,8 @@ Full documentation available on [Rubydoc.info][Normalize doc]
|
|
38
38
|
|
39
39
|
## Dependencies
|
40
40
|
|
41
|
-
* [Ruby](http://ruby-lang.org/) (>=
|
42
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (~>
|
41
|
+
* [Ruby](http://ruby-lang.org/) (>= 2.0)
|
42
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (~> 2.0)
|
43
43
|
|
44
44
|
## Installation
|
45
45
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0.beta1
|
data/lib/rdf/normalize.rb
CHANGED
@@ -28,7 +28,6 @@ module RDF
|
|
28
28
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
29
29
|
module Normalize
|
30
30
|
require 'rdf/normalize/format'
|
31
|
-
require 'rdf/normalize/utils'
|
32
31
|
autoload :Base, 'rdf/normalize/base'
|
33
32
|
autoload :Carroll2001,'rdf/normalize/carroll2001'
|
34
33
|
autoload :URGNA2012, 'rdf/normalize/urgna2012'
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module RDF::Normalize
|
2
2
|
class URDNA2015
|
3
3
|
include RDF::Enumerable
|
4
|
+
include RDF::Util::Logger
|
4
5
|
include Base
|
5
|
-
include Utils
|
6
6
|
|
7
7
|
##
|
8
8
|
# Create an enumerable with grounded nodes
|
@@ -35,8 +35,8 @@ module RDF::Normalize
|
|
35
35
|
|
36
36
|
# Calculate hashes for first degree nodes
|
37
37
|
non_normalized_identifiers.each do |node|
|
38
|
-
hash =
|
39
|
-
|
38
|
+
hash = log_depth {ns.hash_first_degree_quads(node)}
|
39
|
+
log_debug("1deg") {"hash: #{hash}"}
|
40
40
|
ns.add_bnode_hash(node, hash)
|
41
41
|
end
|
42
42
|
|
@@ -46,7 +46,7 @@ module RDF::Normalize
|
|
46
46
|
next if identifier_list.length > 1
|
47
47
|
node = identifier_list.first
|
48
48
|
id = ns.canonical_issuer.issue_identifier(node)
|
49
|
-
|
49
|
+
log_debug("single node") {"node: #{node.to_ntriples}, hash: #{hash}, id: #{id}"}
|
50
50
|
non_normalized_identifiers -= identifier_list
|
51
51
|
ns.hash_to_bnodes.delete(hash)
|
52
52
|
simple = true
|
@@ -57,7 +57,7 @@ module RDF::Normalize
|
|
57
57
|
ns.hash_to_bnodes.keys.sort.each do |hash|
|
58
58
|
identifier_list = ns.hash_to_bnodes[hash]
|
59
59
|
|
60
|
-
|
60
|
+
log_debug("multiple nodes") {"node: #{identifier_list.map(&:to_ntriples).join(",")}, hash: #{hash}"}
|
61
61
|
hash_path_list = []
|
62
62
|
|
63
63
|
# Create a hash_path_list for all bnodes using a temporary identifier used to create canonical replacements
|
@@ -65,15 +65,15 @@ module RDF::Normalize
|
|
65
65
|
next if ns.canonical_issuer.issued.include?(identifier)
|
66
66
|
temporary_issuer = IdentifierIssuer.new("_:b")
|
67
67
|
temporary_issuer.issue_identifier(identifier)
|
68
|
-
hash_path_list <<
|
68
|
+
hash_path_list << log_depth {ns.hash_n_degree_quads(identifier, temporary_issuer)}
|
69
69
|
end
|
70
|
-
|
70
|
+
log_debug("->") {"hash_path_list: #{hash_path_list.map(&:first).inspect}"}
|
71
71
|
|
72
72
|
# Create canonical replacements for nodes
|
73
73
|
hash_path_list.sort_by(&:first).map(&:last).each do |issuer|
|
74
74
|
issuer.issued.each do |node|
|
75
75
|
id = ns.canonical_issuer.issue_identifier(node)
|
76
|
-
|
76
|
+
log_debug("-->") {"node: #{node.to_ntriples}, id: #{id}"}
|
77
77
|
end
|
78
78
|
end
|
79
79
|
end
|
@@ -94,7 +94,7 @@ module RDF::Normalize
|
|
94
94
|
private
|
95
95
|
|
96
96
|
class NormalizationState
|
97
|
-
include
|
97
|
+
include RDF::Util::Logger
|
98
98
|
|
99
99
|
attr_accessor :bnode_to_statements
|
100
100
|
attr_accessor :hash_to_bnodes
|
@@ -116,7 +116,7 @@ module RDF::Normalize
|
|
116
116
|
end
|
117
117
|
|
118
118
|
# @param [RDF::Node] node
|
119
|
-
# @return [String] the
|
119
|
+
# @return [String] the SHA256 hexdigest hash of statements using this node, with replacements
|
120
120
|
def hash_first_degree_quads(node)
|
121
121
|
quads = bnode_to_statements[node].
|
122
122
|
map do |statement|
|
@@ -130,7 +130,7 @@ module RDF::Normalize
|
|
130
130
|
RDF::NQuads::Writer.serialize(RDF::Statement.from(quad))
|
131
131
|
end
|
132
132
|
|
133
|
-
|
133
|
+
log_debug("1deg") {"node: #{node}, quads: #{quads}"}
|
134
134
|
hexdigest(quads.sort.join)
|
135
135
|
end
|
136
136
|
|
@@ -138,7 +138,7 @@ module RDF::Normalize
|
|
138
138
|
# @param [RDF::Statement] statement
|
139
139
|
# @param [IdentifierIssuer] issuer
|
140
140
|
# @param [String] position one of :s, :o, or :g
|
141
|
-
# @return [String] the
|
141
|
+
# @return [String] the SHA256 hexdigest hash
|
142
142
|
def hash_related_node(related, statement, issuer, position)
|
143
143
|
identifier = canonical_issuer.identifier(related) ||
|
144
144
|
issuer.identifier(related) ||
|
@@ -146,7 +146,7 @@ module RDF::Normalize
|
|
146
146
|
input = position.to_s
|
147
147
|
input << statement.predicate.to_ntriples unless position == :g
|
148
148
|
input << identifier
|
149
|
-
|
149
|
+
log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
|
150
150
|
hexdigest(input)
|
151
151
|
end
|
152
152
|
|
@@ -154,7 +154,7 @@ module RDF::Normalize
|
|
154
154
|
# @param [IdentifierIssuer] issuer
|
155
155
|
# @return [Array<String,IdentifierIssuer>] the Hash and issuer
|
156
156
|
def hash_n_degree_quads(identifier, issuer)
|
157
|
-
|
157
|
+
log_debug("ndeg") {"identifier: #{identifier.to_ntriples}"}
|
158
158
|
|
159
159
|
# hash to related blank nodes map
|
160
160
|
map = {}
|
@@ -165,8 +165,8 @@ module RDF::Normalize
|
|
165
165
|
|
166
166
|
data_to_hash = ""
|
167
167
|
|
168
|
-
|
169
|
-
|
168
|
+
log_debug("ndeg") {"map: #{map.map {|h,l| "#{h}: #{l.map(&:to_ntriples)}"}.join('; ')}"}
|
169
|
+
log_depth do
|
170
170
|
map.keys.sort.each do |hash|
|
171
171
|
list = map[hash]
|
172
172
|
# Iterate over related nodes
|
@@ -174,7 +174,7 @@ module RDF::Normalize
|
|
174
174
|
data_to_hash += hash
|
175
175
|
|
176
176
|
list.permutation do |permutation|
|
177
|
-
|
177
|
+
log_debug("ndeg") {"perm: #{permutation.map(&:to_ntriples).join(",")}"}
|
178
178
|
issuer_copy, path, recursion_list = issuer.dup, "", []
|
179
179
|
|
180
180
|
permutation.each do |related|
|
@@ -188,10 +188,10 @@ module RDF::Normalize
|
|
188
188
|
# Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
|
189
189
|
break if !chosen_path.empty? && path.length >= chosen_path.length
|
190
190
|
end
|
191
|
-
|
191
|
+
log_debug("ndeg") {"hash: #{hash}, path: #{path}, recursion: #{recursion_list.map(&:to_ntriples)}"}
|
192
192
|
|
193
193
|
recursion_list.each do |related|
|
194
|
-
result =
|
194
|
+
result = log_depth {hash_n_degree_quads(related, issuer_copy)}
|
195
195
|
path << issuer_copy.issue_identifier(related)
|
196
196
|
path << "<#{result.first}>"
|
197
197
|
issuer_copy = result.last
|
@@ -208,15 +208,14 @@ module RDF::Normalize
|
|
208
208
|
end
|
209
209
|
end
|
210
210
|
|
211
|
-
|
211
|
+
log_debug("ndeg") {"datatohash: #{data_to_hash.inspect}, hash: #{hexdigest(data_to_hash)}"}
|
212
212
|
return [hexdigest(data_to_hash), issuer]
|
213
213
|
end
|
214
214
|
|
215
215
|
protected
|
216
216
|
|
217
|
-
# FIXME: should be SHA-256.
|
218
217
|
def hexdigest(val)
|
219
|
-
Digest::
|
218
|
+
Digest::SHA256.hexdigest(val)
|
220
219
|
end
|
221
220
|
|
222
221
|
# Group adjacent bnodes by hash
|
@@ -224,7 +223,7 @@ module RDF::Normalize
|
|
224
223
|
statement.to_hash(:s, :p, :o, :g).each do |pos, term|
|
225
224
|
next if !term.is_a?(RDF::Node) || term == identifier
|
226
225
|
|
227
|
-
hash =
|
226
|
+
hash = log_depth {hash_related_node(term, statement, issuer, pos)}
|
228
227
|
map[hash] ||= []
|
229
228
|
map[hash] << term unless map[hash].include?(term)
|
230
229
|
end
|
@@ -26,18 +26,18 @@ module RDF::Normalize
|
|
26
26
|
input = position.to_s
|
27
27
|
input << statement.predicate.to_s
|
28
28
|
input << identifier
|
29
|
-
|
29
|
+
log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
|
30
30
|
hexdigest(input)
|
31
31
|
end
|
32
32
|
|
33
33
|
# In URGNA2012, the position parameter passed to the Hash Related Blank Node algorithm was instead modeled as a direction parameter, where it could have the value p, for property, when the related blank node was a `subject` and the value r, for reverse or reference, when the related blank node was an `object`. Since URGNA2012 only normalized graphs, not datasets, there was no use of the `graph` position.
|
34
34
|
def hash_related_statement(identifier, statement, issuer, map)
|
35
35
|
if statement.subject.node? && statement.subject != identifier
|
36
|
-
hash =
|
36
|
+
hash = log_depth {hash_related_node(statement.subject, statement, issuer, :p)}
|
37
37
|
map[hash] ||= []
|
38
38
|
map[hash] << statement.subject unless map[hash].include?(statement.subject)
|
39
39
|
elsif statement.object.node? && statement.object != identifier
|
40
|
-
hash =
|
40
|
+
hash = log_depth {hash_related_node(statement.object, statement, issuer, :r)}
|
41
41
|
map[hash] ||= []
|
42
42
|
map[hash] << statement.object unless map[hash].include?(statement.object)
|
43
43
|
end
|
data/lib/rdf/normalize/writer.rb
CHANGED
@@ -25,7 +25,6 @@ module RDF::Normalize
|
|
25
25
|
# @yieldparam [RDF::Writer] writer
|
26
26
|
def initialize(output = $stdout, options = {}, &block)
|
27
27
|
super do
|
28
|
-
@options[:depth] ||= 0
|
29
28
|
@repo = RDF::Repository.new
|
30
29
|
if block_given?
|
31
30
|
case block.arity
|
@@ -36,10 +35,17 @@ module RDF::Normalize
|
|
36
35
|
end
|
37
36
|
end
|
38
37
|
|
38
|
+
|
39
39
|
##
|
40
|
-
#
|
41
|
-
|
42
|
-
|
40
|
+
# Adds statements to the repository to be serialized in epilogue.
|
41
|
+
#
|
42
|
+
# @param [RDF::Resource] subject
|
43
|
+
# @param [RDF::URI] predicate
|
44
|
+
# @param [RDF::Value] object
|
45
|
+
# @param [RDF::Resource] graph_name
|
46
|
+
# @return [void]
|
47
|
+
def write_quad(subject, predicate, object, graph_name)
|
48
|
+
@repo.insert(RDF::Statement(subject, predicate, object, graph_name: graph_name))
|
43
49
|
end
|
44
50
|
|
45
51
|
##
|
@@ -55,18 +61,11 @@ module RDF::Normalize
|
|
55
61
|
each do |line|
|
56
62
|
puts line
|
57
63
|
end
|
64
|
+
super
|
58
65
|
end
|
59
66
|
|
60
67
|
protected
|
61
68
|
|
62
|
-
##
|
63
|
-
# Adds a statement to be serialized
|
64
|
-
# @param [RDF::Statement] statement
|
65
|
-
# @return [void]
|
66
|
-
def insert_statement(statement)
|
67
|
-
@repo.insert(statement)
|
68
|
-
end
|
69
|
-
|
70
69
|
##
|
71
70
|
# Insert an Enumerable
|
72
71
|
#
|
metadata
CHANGED
@@ -1,43 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-normalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdf
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.0.beta
|
20
|
+
- - "<"
|
18
21
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
22
|
+
version: '3'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - "
|
27
|
+
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
29
|
+
version: 2.0.0.beta
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: rdf-spec
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
|
-
- - "
|
37
|
+
- - ">="
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
39
|
+
version: 2.0.0.beta
|
40
|
+
- - "<"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '3'
|
34
43
|
type: :development
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
|
-
- - "
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 2.0.0.beta
|
50
|
+
- - "<"
|
39
51
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
52
|
+
version: '3'
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: open-uri-cached
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,16 +102,22 @@ dependencies:
|
|
90
102
|
name: json-ld
|
91
103
|
requirement: !ruby/object:Gem::Requirement
|
92
104
|
requirements:
|
93
|
-
- - "
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: 2.0.0.beta
|
108
|
+
- - "<"
|
94
109
|
- !ruby/object:Gem::Version
|
95
|
-
version: '
|
110
|
+
version: '3'
|
96
111
|
type: :development
|
97
112
|
prerelease: false
|
98
113
|
version_requirements: !ruby/object:Gem::Requirement
|
99
114
|
requirements:
|
100
|
-
- - "
|
115
|
+
- - ">="
|
101
116
|
- !ruby/object:Gem::Version
|
102
|
-
version:
|
117
|
+
version: 2.0.0.beta
|
118
|
+
- - "<"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '3'
|
103
121
|
- !ruby/object:Gem::Dependency
|
104
122
|
name: yard
|
105
123
|
requirement: !ruby/object:Gem::Requirement
|
@@ -130,11 +148,10 @@ files:
|
|
130
148
|
- lib/rdf/normalize/format.rb
|
131
149
|
- lib/rdf/normalize/urdna2015.rb
|
132
150
|
- lib/rdf/normalize/urgna2012.rb
|
133
|
-
- lib/rdf/normalize/utils.rb
|
134
151
|
- lib/rdf/normalize/writer.rb
|
135
152
|
homepage: http://github.com/gkellogg/rdf-normalize
|
136
153
|
licenses:
|
137
|
-
-
|
154
|
+
- Unlicense
|
138
155
|
metadata: {}
|
139
156
|
post_install_message:
|
140
157
|
rdoc_options: []
|
@@ -144,15 +161,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
161
|
requirements:
|
145
162
|
- - ">="
|
146
163
|
- !ruby/object:Gem::Version
|
147
|
-
version:
|
164
|
+
version: 2.0.0
|
148
165
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
166
|
requirements:
|
150
|
-
- - "
|
167
|
+
- - ">"
|
151
168
|
- !ruby/object:Gem::Version
|
152
|
-
version:
|
169
|
+
version: 1.3.1
|
153
170
|
requirements: []
|
154
171
|
rubyforge_project: rdf-normalize
|
155
|
-
rubygems_version: 2.
|
172
|
+
rubygems_version: 2.5.1
|
156
173
|
signing_key:
|
157
174
|
specification_version: 4
|
158
175
|
summary: RDF Graph normalizer for Ruby.
|
data/lib/rdf/normalize/utils.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module RDF::Normalize
|
2
|
-
module Utils
|
3
|
-
# Add debug event to debug array, if specified
|
4
|
-
#
|
5
|
-
# param [String] message
|
6
|
-
# yieldreturn [String] appended to message, to allow for lazy-evaulation of message
|
7
|
-
def debug(*args)
|
8
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
9
|
-
return unless options[:debug] || @options[:debug]
|
10
|
-
depth = options[:depth] || @options[:depth]
|
11
|
-
d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
|
12
|
-
list = args
|
13
|
-
list << yield if block_given?
|
14
|
-
message = d_str + (list.empty? ? "" : list.join(": "))
|
15
|
-
options[:debug] << message if options[:debug].is_a?(Array)
|
16
|
-
@options[:debug] << message if @options[:debug].is_a?(Array)
|
17
|
-
$stderr.puts(message) if @options[:debug] == TrueClass
|
18
|
-
end
|
19
|
-
module_function :debug
|
20
|
-
|
21
|
-
# Increase depth around a method invocation
|
22
|
-
# @yield
|
23
|
-
# Yields with no arguments
|
24
|
-
# @yieldreturn [Object] returns the result of yielding
|
25
|
-
# @return [Object]
|
26
|
-
def depth
|
27
|
-
@options[:depth] += 1
|
28
|
-
ret = yield
|
29
|
-
@options[:depth] -= 1
|
30
|
-
ret
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|