rdf-isomorphic 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +1 -0
- data/README +70 -0
- data/README.md +70 -0
- data/UNLICENSE +24 -0
- data/VERSION +1 -0
- data/lib/rdf/isomorphic.rb +220 -0
- metadata +89 -0
data/AUTHORS
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
* Ben Lavender <blavender@gmail.com> (Lead developer)
|
data/README
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# RDF Isomorphism
|
2
|
+
|
3
|
+
Provides RDF Isomorphism functionality for RDF.rb RDF::Enumerables. That
|
4
|
+
includes RDF::Repository, RDF::Graph, query results, and more.
|
5
|
+
|
6
|
+
For more information about RDF.rb, see <http://rdf.rubyforge.org>
|
7
|
+
|
8
|
+
## Synopsis:
|
9
|
+
|
10
|
+
require 'rdf/isomorphic'
|
11
|
+
require 'rdf/ntriples'
|
12
|
+
a = RDF::Repository.load './tests/isomorphic/test1/test1-1.nt'
|
13
|
+
a.first
|
14
|
+
=> < RDF::Statement:0xd344c4(<http://example.org/a> <http://example.org/prop> <_:abc> .) >
|
15
|
+
|
16
|
+
b = RDF::Repository.load './tests/isomorphic/test1/test1-2.nt'
|
17
|
+
b.first
|
18
|
+
=> < RDF::Statement:0xd3801a(<http://example.org/a> <http://example.org/prop> <_:testing> .) >
|
19
|
+
|
20
|
+
a.isomorphic_with? b
|
21
|
+
=> true
|
22
|
+
a.bijection_to b
|
23
|
+
=> { #<RDF::Node:0xd345a0(_:abc)>=>#<RDF::Node:0xd38574(_:testing)> }
|
24
|
+
|
25
|
+
## Algorithm
|
26
|
+
|
27
|
+
More discussion on the algorithm used will be in a forthcoming blog post, but
|
28
|
+
it is very similar to the one described by Jeremy Carroll in
|
29
|
+
<http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf>.
|
30
|
+
|
31
|
+
Generally speaking, the Carroll algorithm is a very good fit for RDF graphs. It
|
32
|
+
is a specialization of the naive factorial-time test for graph isomorphism,
|
33
|
+
wherin non-anonymous RDF data lets us eliminate vast quantities of options well
|
34
|
+
before we try them. Pathological cases, such as graphs which only contain
|
35
|
+
anonymous resources, will experience poor performance.
|
36
|
+
|
37
|
+
### Equality
|
38
|
+
|
39
|
+
Although it was considered to provide `==` to mean isomorphic, RDF isomorphism is a
|
40
|
+
factorial-complexity problem and it seemed better to perhaps not overwrite such
|
41
|
+
a commonly used method for that. But it's really useful for specs in RDF
|
42
|
+
libraries. Try this:
|
43
|
+
|
44
|
+
require 'rdf/isomorphic'
|
45
|
+
module RDF
|
46
|
+
module Isomorphic
|
47
|
+
alias_method :==, :isomorphic_with
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
describe 'something' do
|
52
|
+
context 'does' do
|
53
|
+
it 'should be equal' do
|
54
|
+
repository_a.should == repository_b
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
### Information
|
60
|
+
* Author: Ben Lavender <blavender@gmail.com>
|
61
|
+
* Source: <http://github.com/bhuga/RDF-Isomorphic>
|
62
|
+
* Issues: <http://github.com/bhuga/RDF-Isomorphic/issues>
|
63
|
+
|
64
|
+
### See also
|
65
|
+
* RDF.rb: <http://rdf.rubyforge.org>
|
66
|
+
* RDF.rb source: <http://github.com/bendiken/rdf>
|
67
|
+
|
68
|
+
### "License"
|
69
|
+
|
70
|
+
rdf-isomorphic is free and unemcumbered software in the public domain. For more information, see the accompanying UNLICENSE file or <http://unlicense.org>
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# RDF Isomorphism
|
2
|
+
|
3
|
+
Provides RDF Isomorphism functionality for RDF.rb RDF::Enumerables. That
|
4
|
+
includes RDF::Repository, RDF::Graph, query results, and more.
|
5
|
+
|
6
|
+
For more information about RDF.rb, see <http://rdf.rubyforge.org>
|
7
|
+
|
8
|
+
## Synopsis:
|
9
|
+
|
10
|
+
require 'rdf/isomorphic'
|
11
|
+
require 'rdf/ntriples'
|
12
|
+
a = RDF::Repository.load './tests/isomorphic/test1/test1-1.nt'
|
13
|
+
a.first
|
14
|
+
=> < RDF::Statement:0xd344c4(<http://example.org/a> <http://example.org/prop> <_:abc> .) >
|
15
|
+
|
16
|
+
b = RDF::Repository.load './tests/isomorphic/test1/test1-2.nt'
|
17
|
+
b.first
|
18
|
+
=> < RDF::Statement:0xd3801a(<http://example.org/a> <http://example.org/prop> <_:testing> .) >
|
19
|
+
|
20
|
+
a.isomorphic_with? b
|
21
|
+
=> true
|
22
|
+
a.bijection_to b
|
23
|
+
=> { #<RDF::Node:0xd345a0(_:abc)>=>#<RDF::Node:0xd38574(_:testing)> }
|
24
|
+
|
25
|
+
## Algorithm
|
26
|
+
|
27
|
+
More discussion on the algorithm used will be in a forthcoming blog post, but
|
28
|
+
it is very similar to the one described by Jeremy Carroll in
|
29
|
+
<http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf>.
|
30
|
+
|
31
|
+
Generally speaking, the Carroll algorithm is a very good fit for RDF graphs. It
|
32
|
+
is a specialization of the naive factorial-time test for graph isomorphism,
|
33
|
+
wherin non-anonymous RDF data lets us eliminate vast quantities of options well
|
34
|
+
before we try them. Pathological cases, such as graphs which only contain
|
35
|
+
anonymous resources, will experience poor performance.
|
36
|
+
|
37
|
+
### Equality
|
38
|
+
|
39
|
+
Although it was considered to provide `==` to mean isomorphic, RDF isomorphism is a
|
40
|
+
factorial-complexity problem and it seemed better to perhaps not overwrite such
|
41
|
+
a commonly used method for that. But it's really useful for specs in RDF
|
42
|
+
libraries. Try this:
|
43
|
+
|
44
|
+
require 'rdf/isomorphic'
|
45
|
+
module RDF
|
46
|
+
module Isomorphic
|
47
|
+
alias_method :==, :isomorphic_with
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
describe 'something' do
|
52
|
+
context 'does' do
|
53
|
+
it 'should be equal' do
|
54
|
+
repository_a.should == repository_b
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
### Information
|
60
|
+
* Author: Ben Lavender <blavender@gmail.com>
|
61
|
+
* Source: <http://github.com/bhuga/RDF-Isomorphic>
|
62
|
+
* Issues: <http://github.com/bhuga/RDF-Isomorphic/issues>
|
63
|
+
|
64
|
+
### See also
|
65
|
+
* RDF.rb: <http://rdf.rubyforge.org>
|
66
|
+
* RDF.rb source: <http://github.com/bendiken/rdf>
|
67
|
+
|
68
|
+
### "License"
|
69
|
+
|
70
|
+
rdf-isomorphic is free and unemcumbered software in the public domain. For more information, see the accompanying UNLICENSE file or <http://unlicense.org>
|
data/UNLICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
2
|
+
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4
|
+
distribute this software, either in source code form or as a compiled
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
6
|
+
means.
|
7
|
+
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
9
|
+
of this software dedicate any and all copyright interest in the
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
11
|
+
of the public at large and to the detriment of our heirs and
|
12
|
+
successors. We intend this dedication to be an overt act of
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
14
|
+
software under copyright law.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
For more information, please refer to <http://unlicense.org/>
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,220 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
require 'rdf'
|
3
|
+
|
4
|
+
|
5
|
+
module RDF
|
6
|
+
##
|
7
|
+
# Isomorphism for rdf.rb Enumerables
|
8
|
+
#
|
9
|
+
# RDF::Isomorphic provides the functions isomorphic_with and bijection_to for RDF::Enumerable.
|
10
|
+
#
|
11
|
+
# @see http://rdf.rubyforge.org
|
12
|
+
# @see http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf
|
13
|
+
module Isomorphic
|
14
|
+
|
15
|
+
# Returns `true` if this RDF::Enumerable is isomorphic with another.
|
16
|
+
# @return [Boolean]
|
17
|
+
# @example
|
18
|
+
# repository_a.isomorphic_with repository_b #=> true
|
19
|
+
def isomorphic_with(other)
|
20
|
+
!(bijection_to(other).nil?)
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :isomorphic?, :isomorphic_with
|
24
|
+
alias_method :isomorphic_with?, :isomorphic_with
|
25
|
+
|
26
|
+
|
27
|
+
# Returns a hash of RDF::Nodes => RDF::Nodes representing an isomorphic
|
28
|
+
# bijection of this RDF::Enumerable's blank nodes, or nil if a bijection
|
29
|
+
# cannot be found.
|
30
|
+
# @example
|
31
|
+
# repository_a.bijection_to repository_b
|
32
|
+
# @param other [RDF::Enumerable]
|
33
|
+
# @return [Hash, nil]
|
34
|
+
def bijection_to(other)
|
35
|
+
named_statements_match = true
|
36
|
+
each_statement do |statement|
|
37
|
+
unless statement.has_blank_nodes?
|
38
|
+
named_statements_match = other.has_statement?(statement)
|
39
|
+
end
|
40
|
+
break unless named_statements_match
|
41
|
+
end
|
42
|
+
|
43
|
+
unless named_statements_match
|
44
|
+
nil
|
45
|
+
else
|
46
|
+
blank_stmts = find_all { |statement| statement.has_blank_nodes? }
|
47
|
+
other_blank_stmts = other.find_all { |statement| statement.has_blank_nodes? }
|
48
|
+
nodes = blank_nodes_in(blank_stmts)
|
49
|
+
other_nodes = blank_nodes_in(other_blank_stmts)
|
50
|
+
build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
# The main recursive bijection algorithm.
|
57
|
+
#
|
58
|
+
# This algorithm is very similar to the one explained by Jeremy Carroll in
|
59
|
+
# http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf. Page 12 has the
|
60
|
+
# relevant pseudocode.
|
61
|
+
#
|
62
|
+
# Many more comments are in the method itself.
|
63
|
+
# @private
|
64
|
+
def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, hashes = {})
|
65
|
+
|
66
|
+
# Some variable descriptions:
|
67
|
+
# anon_stmts, other_anon_stmts: All statements from this and other with anonymous nodes
|
68
|
+
# nodes, other_nodes: All anonymous nodes from this and other
|
69
|
+
# hashes: hashes of signature of an anonymous nodes' relevant statements. Only contains hashes for grounded nodes.
|
70
|
+
# potential_hashes: as hashes, but not limited to grounded nodes
|
71
|
+
# bijection: node => node mapping representing an anonymous node bijection
|
72
|
+
# bijection_hashes: duplicate of hashes from which we remove hashes to make sure bijection is one to one
|
73
|
+
|
74
|
+
# A grounded node, the difference between the contents of
|
75
|
+
# potential_hashes and hashes, is a node which has no ungrounded
|
76
|
+
# anonymous neighbors in a relevant statement.
|
77
|
+
potential_hashes = {}
|
78
|
+
[ [anon_stmts,nodes], [other_anon_stmts,other_nodes] ].each do | tuple |
|
79
|
+
hash_needed = true
|
80
|
+
while hash_needed
|
81
|
+
hash_needed = false
|
82
|
+
tuple.last.each do | node |
|
83
|
+
unless hashes.member? node
|
84
|
+
grounded, hash = node_hash_for(node, tuple.first, hashes) unless hashes.member? node
|
85
|
+
if grounded
|
86
|
+
hash_needed = true
|
87
|
+
hashes[node] = hash
|
88
|
+
end
|
89
|
+
potential_hashes[node] = hash
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# see variables above
|
96
|
+
bijection = {}
|
97
|
+
bijection_hashes = hashes.dup
|
98
|
+
|
99
|
+
# We are looking for nodes such that
|
100
|
+
# hashes[node] == hashes[some_other_node]
|
101
|
+
nodes.each do | node |
|
102
|
+
tuple = bijection_hashes.find do |k, v|
|
103
|
+
(v == bijection_hashes[node]) &&
|
104
|
+
# eql? instead of include? since RDF.rb coincedentally-same-named identifiers will be ==
|
105
|
+
other_nodes.any? do | item | k.eql?(item) end
|
106
|
+
end
|
107
|
+
next unless tuple
|
108
|
+
target = tuple.first
|
109
|
+
bijection_hashes.delete target
|
110
|
+
bijection[node] = target
|
111
|
+
end
|
112
|
+
|
113
|
+
# This if is the return statement, believe it or not.
|
114
|
+
#
|
115
|
+
# First, is the anonymous node mapping 1 to 1?
|
116
|
+
# If so, we have a bijection and are done
|
117
|
+
if (bijection.keys.sort == nodes.sort) && (bijection.values.sort == other_nodes.sort)
|
118
|
+
bijection
|
119
|
+
# So we've got unhashed nodes that can't be definitively grounded. Make
|
120
|
+
# a tentative bijection between two with identical ungrounded signatures
|
121
|
+
# in the graph and recurse.
|
122
|
+
else
|
123
|
+
bijection = nil
|
124
|
+
nodes.each do | node |
|
125
|
+
# We don't replace grounded nodes' hashes
|
126
|
+
next if hashes.member? node
|
127
|
+
bijectable = other_nodes.any? do | other_node |
|
128
|
+
# We don't replace grounded nodes' hashes
|
129
|
+
next if hashes.member? other_node
|
130
|
+
# The ungrounded signature must match for this pair to have a chance.
|
131
|
+
# If the signature doesn't match, skip it.
|
132
|
+
next unless potential_hashes[node] == potential_hashes[other_node]
|
133
|
+
hash = Digest::SHA1.hexdigest(node.to_s)
|
134
|
+
test_hashes = { node => hash, other_node => hash}
|
135
|
+
bijection = build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, hashes.merge(test_hashes))
|
136
|
+
end
|
137
|
+
break if bijection
|
138
|
+
end
|
139
|
+
bijection
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# @private
|
144
|
+
# @return [RDF::Node]
|
145
|
+
# Blank nodes appearing in given list of statements
|
146
|
+
def blank_nodes_in(blank_stmt_list)
|
147
|
+
nodes = []
|
148
|
+
blank_stmt_list.each do | statement |
|
149
|
+
nodes << statement.object if statement.object.anonymous?
|
150
|
+
nodes << statement.subject if statement.subject.anonymous?
|
151
|
+
end
|
152
|
+
nodes.uniq
|
153
|
+
end
|
154
|
+
|
155
|
+
# Generate a hash for a node based on the signature of the statements it
|
156
|
+
# appears in. Signatures consist of grounded elements in statements
|
157
|
+
# associated with a node, that is, anything but an ungrounded anonymous
|
158
|
+
# node. Creating the hash is simply hashing a sorted list of each
|
159
|
+
# statement's signature, which is itself a concatenation of the string form
|
160
|
+
# of all grounded elements.
|
161
|
+
#
|
162
|
+
# Nodes other than the given node are considered grounded if they are a
|
163
|
+
# member in the given hash.
|
164
|
+
#
|
165
|
+
# Returns a tuple consisting of grounded being true or false and the String
|
166
|
+
# for the hash
|
167
|
+
# @private
|
168
|
+
# @return [Boolean, String]
|
169
|
+
def node_hash_for(node,statements,hashes)
|
170
|
+
statement_signatures = []
|
171
|
+
grounded = true
|
172
|
+
statements.each do | statement |
|
173
|
+
if (statement.object == node) || (statement.subject == node)
|
174
|
+
statement_signatures << hash_string_for(statement,hashes)
|
175
|
+
[statement.subject, statement.object].each do | resource |
|
176
|
+
grounded = false unless grounded(resource, hashes)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
[grounded,Digest::SHA1.hexdigest(statement_signatures.sort.to_s)]
|
181
|
+
end
|
182
|
+
|
183
|
+
# Provide a string signature for the given statement.
|
184
|
+
# @private
|
185
|
+
def hash_string_for(statement,hashes)
|
186
|
+
hash = ""
|
187
|
+
hash << string_for_node(statement.subject,hashes)
|
188
|
+
hash << statement.predicate.to_s
|
189
|
+
hash << string_for_node(statement.object,hashes)
|
190
|
+
hash
|
191
|
+
end
|
192
|
+
|
193
|
+
# Returns true if a given node is grounded
|
194
|
+
# @private
|
195
|
+
def grounded(node, hashes)
|
196
|
+
(!(node.anonymous?)) || (hashes.member? node)
|
197
|
+
end
|
198
|
+
|
199
|
+
# Provides a string for the given node for use in a string signature
|
200
|
+
# @private
|
201
|
+
def string_for_node(node, hashes)
|
202
|
+
if node.anonymous?
|
203
|
+
if hashes.member? node
|
204
|
+
hashes[node]
|
205
|
+
else
|
206
|
+
""
|
207
|
+
end
|
208
|
+
else
|
209
|
+
node.to_s
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
module Enumerable
|
217
|
+
include RDF::Isomorphic
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rdf-isomorphic
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Lavender
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-01 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rdf
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.9
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.2.9
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: yard
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.5.2
|
44
|
+
version:
|
45
|
+
description: " rdf-isomorphic provides bijections mapping blank nodes from one\n RDF::Enumerable to another, and thus equivalence (isomorphism) testing.\n"
|
46
|
+
email: blavender@gmail.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- AUTHORS
|
55
|
+
- README
|
56
|
+
- UNLICENSE
|
57
|
+
- VERSION
|
58
|
+
- README.md
|
59
|
+
- lib/rdf/isomorphic.rb
|
60
|
+
has_rdoc: false
|
61
|
+
homepage: http://rdf.rubyforge.org/
|
62
|
+
licenses:
|
63
|
+
- Public Domain
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 1.8.2
|
74
|
+
version:
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: "0"
|
80
|
+
version:
|
81
|
+
requirements: []
|
82
|
+
|
83
|
+
rubyforge_project: rdf-isomorphic
|
84
|
+
rubygems_version: 1.3.5
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Graph bijections and isomorphic equivalence for rdf.rb
|
88
|
+
test_files: []
|
89
|
+
|