rdf-normalize 0.1.0 → 0.3.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/VERSION +1 -1
- data/lib/rdf/normalize.rb +0 -1
- data/lib/rdf/normalize/urdna2015.rb +22 -23
- data/lib/rdf/normalize/urgna2012.rb +3 -3
- data/lib/rdf/normalize/writer.rb +11 -12
- metadata +37 -20
- data/lib/rdf/normalize/utils.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1135be30a9a3c1f15e14fa57ff0dce5488a53734
|
4
|
+
data.tar.gz: fd85ba1edde8b8d03a297d65d4a69dae7da3f308
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6de1463ff4f57d54e937e24ba59134a8c8f4cf10f228e9835d032118a3b6297a40d485075c35e679db3eae176340df0492af88efca3613d0bb00d0f413bb9f1
|
7
|
+
data.tar.gz: 979b2463ccc9859cd752548a9f9532f1c16206205c5c54554408099f2167427a97dc3f4b9b6525fe3ee096dacecb916ec78d6f4b32e42189a426cedb1547fc5d
|
data/README.md
CHANGED
@@ -15,12 +15,12 @@ to serialize normalized statements.
|
|
15
15
|
Algorithms implemented:
|
16
16
|
|
17
17
|
* [URGNA2012](http://json-ld.github.io/normalization/spec/index.html#dfn-urgna2012)
|
18
|
-
* [
|
18
|
+
* [URDNA2015](http://json-ld.github.io/normalization/spec/index.html#dfn-urdna2015)
|
19
19
|
|
20
20
|
Install with `gem install rdf-normalize`
|
21
21
|
|
22
22
|
* 100% free and unencumbered [public domain](http://unlicense.org/) software.
|
23
|
-
* Compatible with Ruby >=
|
23
|
+
* Compatible with Ruby >= 2.0.
|
24
24
|
|
25
25
|
## Usage
|
26
26
|
|
@@ -38,8 +38,8 @@ Full documentation available on [Rubydoc.info][Normalize doc]
|
|
38
38
|
|
39
39
|
## Dependencies
|
40
40
|
|
41
|
-
* [Ruby](http://ruby-lang.org/) (>=
|
42
|
-
* [RDF.rb](http://rubygems.org/gems/rdf) (~>
|
41
|
+
* [Ruby](http://ruby-lang.org/) (>= 2.0)
|
42
|
+
* [RDF.rb](http://rubygems.org/gems/rdf) (~> 2.0)
|
43
43
|
|
44
44
|
## Installation
|
45
45
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0.beta1
|
data/lib/rdf/normalize.rb
CHANGED
@@ -28,7 +28,6 @@ module RDF
|
|
28
28
|
# @author [Gregg Kellogg](http://greggkellogg.net/)
|
29
29
|
module Normalize
|
30
30
|
require 'rdf/normalize/format'
|
31
|
-
require 'rdf/normalize/utils'
|
32
31
|
autoload :Base, 'rdf/normalize/base'
|
33
32
|
autoload :Carroll2001,'rdf/normalize/carroll2001'
|
34
33
|
autoload :URGNA2012, 'rdf/normalize/urgna2012'
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module RDF::Normalize
|
2
2
|
class URDNA2015
|
3
3
|
include RDF::Enumerable
|
4
|
+
include RDF::Util::Logger
|
4
5
|
include Base
|
5
|
-
include Utils
|
6
6
|
|
7
7
|
##
|
8
8
|
# Create an enumerable with grounded nodes
|
@@ -35,8 +35,8 @@ module RDF::Normalize
|
|
35
35
|
|
36
36
|
# Calculate hashes for first degree nodes
|
37
37
|
non_normalized_identifiers.each do |node|
|
38
|
-
hash =
|
39
|
-
|
38
|
+
hash = log_depth {ns.hash_first_degree_quads(node)}
|
39
|
+
log_debug("1deg") {"hash: #{hash}"}
|
40
40
|
ns.add_bnode_hash(node, hash)
|
41
41
|
end
|
42
42
|
|
@@ -46,7 +46,7 @@ module RDF::Normalize
|
|
46
46
|
next if identifier_list.length > 1
|
47
47
|
node = identifier_list.first
|
48
48
|
id = ns.canonical_issuer.issue_identifier(node)
|
49
|
-
|
49
|
+
log_debug("single node") {"node: #{node.to_ntriples}, hash: #{hash}, id: #{id}"}
|
50
50
|
non_normalized_identifiers -= identifier_list
|
51
51
|
ns.hash_to_bnodes.delete(hash)
|
52
52
|
simple = true
|
@@ -57,7 +57,7 @@ module RDF::Normalize
|
|
57
57
|
ns.hash_to_bnodes.keys.sort.each do |hash|
|
58
58
|
identifier_list = ns.hash_to_bnodes[hash]
|
59
59
|
|
60
|
-
|
60
|
+
log_debug("multiple nodes") {"node: #{identifier_list.map(&:to_ntriples).join(",")}, hash: #{hash}"}
|
61
61
|
hash_path_list = []
|
62
62
|
|
63
63
|
# Create a hash_path_list for all bnodes using a temporary identifier used to create canonical replacements
|
@@ -65,15 +65,15 @@ module RDF::Normalize
|
|
65
65
|
next if ns.canonical_issuer.issued.include?(identifier)
|
66
66
|
temporary_issuer = IdentifierIssuer.new("_:b")
|
67
67
|
temporary_issuer.issue_identifier(identifier)
|
68
|
-
hash_path_list <<
|
68
|
+
hash_path_list << log_depth {ns.hash_n_degree_quads(identifier, temporary_issuer)}
|
69
69
|
end
|
70
|
-
|
70
|
+
log_debug("->") {"hash_path_list: #{hash_path_list.map(&:first).inspect}"}
|
71
71
|
|
72
72
|
# Create canonical replacements for nodes
|
73
73
|
hash_path_list.sort_by(&:first).map(&:last).each do |issuer|
|
74
74
|
issuer.issued.each do |node|
|
75
75
|
id = ns.canonical_issuer.issue_identifier(node)
|
76
|
-
|
76
|
+
log_debug("-->") {"node: #{node.to_ntriples}, id: #{id}"}
|
77
77
|
end
|
78
78
|
end
|
79
79
|
end
|
@@ -94,7 +94,7 @@ module RDF::Normalize
|
|
94
94
|
private
|
95
95
|
|
96
96
|
class NormalizationState
|
97
|
-
include
|
97
|
+
include RDF::Util::Logger
|
98
98
|
|
99
99
|
attr_accessor :bnode_to_statements
|
100
100
|
attr_accessor :hash_to_bnodes
|
@@ -116,7 +116,7 @@ module RDF::Normalize
|
|
116
116
|
end
|
117
117
|
|
118
118
|
# @param [RDF::Node] node
|
119
|
-
# @return [String] the
|
119
|
+
# @return [String] the SHA256 hexdigest hash of statements using this node, with replacements
|
120
120
|
def hash_first_degree_quads(node)
|
121
121
|
quads = bnode_to_statements[node].
|
122
122
|
map do |statement|
|
@@ -130,7 +130,7 @@ module RDF::Normalize
|
|
130
130
|
RDF::NQuads::Writer.serialize(RDF::Statement.from(quad))
|
131
131
|
end
|
132
132
|
|
133
|
-
|
133
|
+
log_debug("1deg") {"node: #{node}, quads: #{quads}"}
|
134
134
|
hexdigest(quads.sort.join)
|
135
135
|
end
|
136
136
|
|
@@ -138,7 +138,7 @@ module RDF::Normalize
|
|
138
138
|
# @param [RDF::Statement] statement
|
139
139
|
# @param [IdentifierIssuer] issuer
|
140
140
|
# @param [String] position one of :s, :o, or :g
|
141
|
-
# @return [String] the
|
141
|
+
# @return [String] the SHA256 hexdigest hash
|
142
142
|
def hash_related_node(related, statement, issuer, position)
|
143
143
|
identifier = canonical_issuer.identifier(related) ||
|
144
144
|
issuer.identifier(related) ||
|
@@ -146,7 +146,7 @@ module RDF::Normalize
|
|
146
146
|
input = position.to_s
|
147
147
|
input << statement.predicate.to_ntriples unless position == :g
|
148
148
|
input << identifier
|
149
|
-
|
149
|
+
log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
|
150
150
|
hexdigest(input)
|
151
151
|
end
|
152
152
|
|
@@ -154,7 +154,7 @@ module RDF::Normalize
|
|
154
154
|
# @param [IdentifierIssuer] issuer
|
155
155
|
# @return [Array<String,IdentifierIssuer>] the Hash and issuer
|
156
156
|
def hash_n_degree_quads(identifier, issuer)
|
157
|
-
|
157
|
+
log_debug("ndeg") {"identifier: #{identifier.to_ntriples}"}
|
158
158
|
|
159
159
|
# hash to related blank nodes map
|
160
160
|
map = {}
|
@@ -165,8 +165,8 @@ module RDF::Normalize
|
|
165
165
|
|
166
166
|
data_to_hash = ""
|
167
167
|
|
168
|
-
|
169
|
-
|
168
|
+
log_debug("ndeg") {"map: #{map.map {|h,l| "#{h}: #{l.map(&:to_ntriples)}"}.join('; ')}"}
|
169
|
+
log_depth do
|
170
170
|
map.keys.sort.each do |hash|
|
171
171
|
list = map[hash]
|
172
172
|
# Iterate over related nodes
|
@@ -174,7 +174,7 @@ module RDF::Normalize
|
|
174
174
|
data_to_hash += hash
|
175
175
|
|
176
176
|
list.permutation do |permutation|
|
177
|
-
|
177
|
+
log_debug("ndeg") {"perm: #{permutation.map(&:to_ntriples).join(",")}"}
|
178
178
|
issuer_copy, path, recursion_list = issuer.dup, "", []
|
179
179
|
|
180
180
|
permutation.each do |related|
|
@@ -188,10 +188,10 @@ module RDF::Normalize
|
|
188
188
|
# Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
|
189
189
|
break if !chosen_path.empty? && path.length >= chosen_path.length
|
190
190
|
end
|
191
|
-
|
191
|
+
log_debug("ndeg") {"hash: #{hash}, path: #{path}, recursion: #{recursion_list.map(&:to_ntriples)}"}
|
192
192
|
|
193
193
|
recursion_list.each do |related|
|
194
|
-
result =
|
194
|
+
result = log_depth {hash_n_degree_quads(related, issuer_copy)}
|
195
195
|
path << issuer_copy.issue_identifier(related)
|
196
196
|
path << "<#{result.first}>"
|
197
197
|
issuer_copy = result.last
|
@@ -208,15 +208,14 @@ module RDF::Normalize
|
|
208
208
|
end
|
209
209
|
end
|
210
210
|
|
211
|
-
|
211
|
+
log_debug("ndeg") {"datatohash: #{data_to_hash.inspect}, hash: #{hexdigest(data_to_hash)}"}
|
212
212
|
return [hexdigest(data_to_hash), issuer]
|
213
213
|
end
|
214
214
|
|
215
215
|
protected
|
216
216
|
|
217
|
-
# FIXME: should be SHA-256.
|
218
217
|
def hexdigest(val)
|
219
|
-
Digest::
|
218
|
+
Digest::SHA256.hexdigest(val)
|
220
219
|
end
|
221
220
|
|
222
221
|
# Group adjacent bnodes by hash
|
@@ -224,7 +223,7 @@ module RDF::Normalize
|
|
224
223
|
statement.to_hash(:s, :p, :o, :g).each do |pos, term|
|
225
224
|
next if !term.is_a?(RDF::Node) || term == identifier
|
226
225
|
|
227
|
-
hash =
|
226
|
+
hash = log_depth {hash_related_node(term, statement, issuer, pos)}
|
228
227
|
map[hash] ||= []
|
229
228
|
map[hash] << term unless map[hash].include?(term)
|
230
229
|
end
|
@@ -26,18 +26,18 @@ module RDF::Normalize
|
|
26
26
|
input = position.to_s
|
27
27
|
input << statement.predicate.to_s
|
28
28
|
input << identifier
|
29
|
-
|
29
|
+
log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
|
30
30
|
hexdigest(input)
|
31
31
|
end
|
32
32
|
|
33
33
|
# In URGNA2012, the position parameter passed to the Hash Related Blank Node algorithm was instead modeled as a direction parameter, where it could have the value p, for property, when the related blank node was a `subject` and the value r, for reverse or reference, when the related blank node was an `object`. Since URGNA2012 only normalized graphs, not datasets, there was no use of the `graph` position.
|
34
34
|
def hash_related_statement(identifier, statement, issuer, map)
|
35
35
|
if statement.subject.node? && statement.subject != identifier
|
36
|
-
hash =
|
36
|
+
hash = log_depth {hash_related_node(statement.subject, statement, issuer, :p)}
|
37
37
|
map[hash] ||= []
|
38
38
|
map[hash] << statement.subject unless map[hash].include?(statement.subject)
|
39
39
|
elsif statement.object.node? && statement.object != identifier
|
40
|
-
hash =
|
40
|
+
hash = log_depth {hash_related_node(statement.object, statement, issuer, :r)}
|
41
41
|
map[hash] ||= []
|
42
42
|
map[hash] << statement.object unless map[hash].include?(statement.object)
|
43
43
|
end
|
data/lib/rdf/normalize/writer.rb
CHANGED
@@ -25,7 +25,6 @@ module RDF::Normalize
|
|
25
25
|
# @yieldparam [RDF::Writer] writer
|
26
26
|
def initialize(output = $stdout, options = {}, &block)
|
27
27
|
super do
|
28
|
-
@options[:depth] ||= 0
|
29
28
|
@repo = RDF::Repository.new
|
30
29
|
if block_given?
|
31
30
|
case block.arity
|
@@ -36,10 +35,17 @@ module RDF::Normalize
|
|
36
35
|
end
|
37
36
|
end
|
38
37
|
|
38
|
+
|
39
39
|
##
|
40
|
-
#
|
41
|
-
|
42
|
-
|
40
|
+
# Adds statements to the repository to be serialized in epilogue.
|
41
|
+
#
|
42
|
+
# @param [RDF::Resource] subject
|
43
|
+
# @param [RDF::URI] predicate
|
44
|
+
# @param [RDF::Value] object
|
45
|
+
# @param [RDF::Resource] graph_name
|
46
|
+
# @return [void]
|
47
|
+
def write_quad(subject, predicate, object, graph_name)
|
48
|
+
@repo.insert(RDF::Statement(subject, predicate, object, graph_name: graph_name))
|
43
49
|
end
|
44
50
|
|
45
51
|
##
|
@@ -55,18 +61,11 @@ module RDF::Normalize
|
|
55
61
|
each do |line|
|
56
62
|
puts line
|
57
63
|
end
|
64
|
+
super
|
58
65
|
end
|
59
66
|
|
60
67
|
protected
|
61
68
|
|
62
|
-
##
|
63
|
-
# Adds a statement to be serialized
|
64
|
-
# @param [RDF::Statement] statement
|
65
|
-
# @return [void]
|
66
|
-
def insert_statement(statement)
|
67
|
-
@repo.insert(statement)
|
68
|
-
end
|
69
|
-
|
70
69
|
##
|
71
70
|
# Insert an Enumerable
|
72
71
|
#
|
metadata
CHANGED
@@ -1,43 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-normalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0.beta1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdf
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.0.beta
|
20
|
+
- - "<"
|
18
21
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
22
|
+
version: '3'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - "
|
27
|
+
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
29
|
+
version: 2.0.0.beta
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: rdf-spec
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
|
-
- - "
|
37
|
+
- - ">="
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
39
|
+
version: 2.0.0.beta
|
40
|
+
- - "<"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '3'
|
34
43
|
type: :development
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
|
-
- - "
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 2.0.0.beta
|
50
|
+
- - "<"
|
39
51
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
52
|
+
version: '3'
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: open-uri-cached
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,16 +102,22 @@ dependencies:
|
|
90
102
|
name: json-ld
|
91
103
|
requirement: !ruby/object:Gem::Requirement
|
92
104
|
requirements:
|
93
|
-
- - "
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: 2.0.0.beta
|
108
|
+
- - "<"
|
94
109
|
- !ruby/object:Gem::Version
|
95
|
-
version: '
|
110
|
+
version: '3'
|
96
111
|
type: :development
|
97
112
|
prerelease: false
|
98
113
|
version_requirements: !ruby/object:Gem::Requirement
|
99
114
|
requirements:
|
100
|
-
- - "
|
115
|
+
- - ">="
|
101
116
|
- !ruby/object:Gem::Version
|
102
|
-
version:
|
117
|
+
version: 2.0.0.beta
|
118
|
+
- - "<"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '3'
|
103
121
|
- !ruby/object:Gem::Dependency
|
104
122
|
name: yard
|
105
123
|
requirement: !ruby/object:Gem::Requirement
|
@@ -130,11 +148,10 @@ files:
|
|
130
148
|
- lib/rdf/normalize/format.rb
|
131
149
|
- lib/rdf/normalize/urdna2015.rb
|
132
150
|
- lib/rdf/normalize/urgna2012.rb
|
133
|
-
- lib/rdf/normalize/utils.rb
|
134
151
|
- lib/rdf/normalize/writer.rb
|
135
152
|
homepage: http://github.com/gkellogg/rdf-normalize
|
136
153
|
licenses:
|
137
|
-
-
|
154
|
+
- Unlicense
|
138
155
|
metadata: {}
|
139
156
|
post_install_message:
|
140
157
|
rdoc_options: []
|
@@ -144,15 +161,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
161
|
requirements:
|
145
162
|
- - ">="
|
146
163
|
- !ruby/object:Gem::Version
|
147
|
-
version:
|
164
|
+
version: 2.0.0
|
148
165
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
166
|
requirements:
|
150
|
-
- - "
|
167
|
+
- - ">"
|
151
168
|
- !ruby/object:Gem::Version
|
152
|
-
version:
|
169
|
+
version: 1.3.1
|
153
170
|
requirements: []
|
154
171
|
rubyforge_project: rdf-normalize
|
155
|
-
rubygems_version: 2.
|
172
|
+
rubygems_version: 2.5.1
|
156
173
|
signing_key:
|
157
174
|
specification_version: 4
|
158
175
|
summary: RDF Graph normalizer for Ruby.
|
data/lib/rdf/normalize/utils.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module RDF::Normalize
|
2
|
-
module Utils
|
3
|
-
# Add debug event to debug array, if specified
|
4
|
-
#
|
5
|
-
# param [String] message
|
6
|
-
# yieldreturn [String] appended to message, to allow for lazy-evaulation of message
|
7
|
-
def debug(*args)
|
8
|
-
options = args.last.is_a?(Hash) ? args.pop : {}
|
9
|
-
return unless options[:debug] || @options[:debug]
|
10
|
-
depth = options[:depth] || @options[:depth]
|
11
|
-
d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
|
12
|
-
list = args
|
13
|
-
list << yield if block_given?
|
14
|
-
message = d_str + (list.empty? ? "" : list.join(": "))
|
15
|
-
options[:debug] << message if options[:debug].is_a?(Array)
|
16
|
-
@options[:debug] << message if @options[:debug].is_a?(Array)
|
17
|
-
$stderr.puts(message) if @options[:debug] == TrueClass
|
18
|
-
end
|
19
|
-
module_function :debug
|
20
|
-
|
21
|
-
# Increase depth around a method invocation
|
22
|
-
# @yield
|
23
|
-
# Yields with no arguments
|
24
|
-
# @yieldreturn [Object] returns the result of yielding
|
25
|
-
# @return [Object]
|
26
|
-
def depth
|
27
|
-
@options[:depth] += 1
|
28
|
-
ret = yield
|
29
|
-
@options[:depth] -= 1
|
30
|
-
ret
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|