rdf-normalize 0.6.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -4
- data/VERSION +1 -1
- data/lib/rdf/normalize/format.rb +12 -1
- data/lib/rdf/normalize/rdfc10.rb +11 -3
- data/lib/rdf/normalize.rb +27 -2
- metadata +14 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 150cdddab40f368e1d1e68ebc65efe3990032729e8d9a591ef8436d61e81d057
|
4
|
+
data.tar.gz: 4510812f3e52b0159ec2025421d116d5c98d37840f3f87e25affb8392a5aa8b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff3fd846a595da0df711bd83673498d259260a19f42f59a9d5f10e55a70670de7c0fba4b301d63ea725e2484179fd6b91b462ea943df9afe8c3d937660d06327
|
7
|
+
data.tar.gz: c66aa1ec1740e1d0c894d5ed9104b61819d055fc83aa2c6674fa1c34d2c9e62c395a78bb6aeec2b46c68c063fc6e79d55250e8d590a0f97c0624888488c4d60c
|
data/README.md
CHANGED
@@ -22,7 +22,7 @@ Algorithms implemented:
|
|
22
22
|
Install with `gem install rdf-normalize`
|
23
23
|
|
24
24
|
* 100% free and unencumbered [public domain](https://unlicense.org/) software.
|
25
|
-
* Compatible with Ruby >=
|
25
|
+
* Compatible with Ruby >= 3.0.
|
26
26
|
|
27
27
|
## Usage
|
28
28
|
|
@@ -37,7 +37,14 @@ Full documentation available on [GitHub][Normalize doc]
|
|
37
37
|
require 'rdf/normalize'
|
38
38
|
require 'rdf/turtle'
|
39
39
|
g = RDF::Graph.load("etc/doap.ttl")
|
40
|
-
puts g.dump(:normalize)
|
40
|
+
puts g.dump(:normalize) # Can also use :canonicalize
|
41
|
+
|
42
|
+
### Normalizing an abstract Graph/Dataset
|
43
|
+
require 'rdf/normalize'
|
44
|
+
require 'rdf/turtle'
|
45
|
+
g = RDF::Graph.load("etc/doap.ttl")
|
46
|
+
g_canon = g.canonicalize # graph with URIs, literals, and blank nodes canonicalized.
|
47
|
+
puts g_canon.dump(:nquads) # Normalized, but not sorted
|
41
48
|
|
42
49
|
### Principle Classes
|
43
50
|
* {RDF::Normalize}
|
@@ -46,11 +53,13 @@ Full documentation available on [GitHub][Normalize doc]
|
|
46
53
|
* {RDF::Normalize::Writer}
|
47
54
|
* {RDF::Normalize::URGNA2012}
|
48
55
|
* {RDF::Normalize::RDFC10}
|
56
|
+
* {RDF::Canonicalize} – extends {RDF::Normalize}
|
57
|
+
* {RDF::Canonicalize::Format}
|
49
58
|
|
50
59
|
## Dependencies
|
51
60
|
|
52
|
-
* [Ruby](https://ruby-lang.org/) (>=
|
53
|
-
* [RDF.rb](https://rubygems.org/gems/rdf) (~> 3.
|
61
|
+
* [Ruby](https://ruby-lang.org/) (>= 3.0)
|
62
|
+
* [RDF.rb](https://rubygems.org/gems/rdf) (~> 3.3)
|
54
63
|
|
55
64
|
## Installation
|
56
65
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.7.0
|
data/lib/rdf/normalize/format.rb
CHANGED
@@ -2,7 +2,18 @@ require 'rdf/nquads'
|
|
2
2
|
|
3
3
|
module RDF::Normalize
|
4
4
|
class Format < RDF::Format
|
5
|
-
content_type 'application/
|
5
|
+
content_type 'application/canonical+n-quads', alias: 'application/x-canonical+n-quads'
|
6
|
+
content_encoding 'utf-8'
|
7
|
+
|
8
|
+
# It reads like normal N-Quads
|
9
|
+
reader { RDF::NQuads::Reader}
|
10
|
+
writer { RDF::Normalize::Writer }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module RDF::Canonicalize
|
15
|
+
class Format < RDF::Format
|
16
|
+
content_type 'application/canonical+n-quads', alias: 'application/x-canonical+n-quads'
|
6
17
|
content_encoding 'utf-8'
|
7
18
|
|
8
19
|
# It reads like normal N-Quads
|
data/lib/rdf/normalize/rdfc10.rb
CHANGED
@@ -17,10 +17,16 @@ module RDF::Normalize
|
|
17
17
|
# @option options [Integer] :max_calls (40)
|
18
18
|
# Maximum number of calls allowed for recursive blank node labeling,
|
19
19
|
# as a multiple of the total number of blank nodes in the dataset.
|
20
|
+
# @options options [:MD5, :SHA1, :SHA2, :SHA256, :SHA384, :SHA512] :hash_algorithm (:SHA256)
|
21
|
+
# See [Digest Algorithms](https://github.com/ruby/digest#digest-algorithms)
|
20
22
|
# @return [RDF::Enumerable]
|
21
23
|
# raise [RuntimeError] if the maximum number of levels of recursion is exceeded.
|
22
24
|
def initialize(enumerable, **options)
|
23
25
|
@dataset, @options = enumerable, options
|
26
|
+
@options[:hash_algorithm] ||= :SHA256
|
27
|
+
unless %i{MD5 SHA1 SHA2 SHA256 SHA384 SHA512}.include?(@options[:hash_algorithm])
|
28
|
+
raise UnknownHashAlgorithm, "UnknownHashAlgorithm: #{@options[:hash_algorithm].inspect}. Use one of MD5, SHA1, SHA2, SHA256, SHA384, or SHA512"
|
29
|
+
end
|
24
30
|
end
|
25
31
|
|
26
32
|
# Yields each normalized statement
|
@@ -158,6 +164,7 @@ module RDF::Normalize
|
|
158
164
|
include RDF::Util::Logger
|
159
165
|
|
160
166
|
attr_accessor :bnode_to_statements
|
167
|
+
attr_accessor :hash_algorithm
|
161
168
|
attr_accessor :hash_to_bnodes
|
162
169
|
attr_accessor :canonical_issuer
|
163
170
|
attr_accessor :max_calls
|
@@ -165,6 +172,7 @@ module RDF::Normalize
|
|
165
172
|
|
166
173
|
def initialize(**options)
|
167
174
|
@options = options
|
175
|
+
@hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256))
|
168
176
|
@bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n")
|
169
177
|
@max_calls, @total_calls = nil, 0
|
170
178
|
end
|
@@ -233,7 +241,7 @@ module RDF::Normalize
|
|
233
241
|
# @param [RDF::Node] node
|
234
242
|
# @param [IdentifierIssuer] issuer
|
235
243
|
# @return [Array<String,IdentifierIssuer>] the Hash and issuer
|
236
|
-
# @raise [
|
244
|
+
# @raise [MaxCallsExceeded] If total number of calls has exceeded `max_calls` times the number of blank nodes in the dataset.
|
237
245
|
def hash_n_degree_quads(node, issuer)
|
238
246
|
log_debug("hndq:")
|
239
247
|
log_debug(" log point", "Hash N-Degree Quads function (4.9.3).")
|
@@ -241,7 +249,7 @@ module RDF::Normalize
|
|
241
249
|
log_debug(" issuer") {issuer.inspect}
|
242
250
|
|
243
251
|
if max_calls && total_calls >= max_calls
|
244
|
-
raise "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
|
252
|
+
raise MaxCallsExceeded, "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
|
245
253
|
end
|
246
254
|
@total_calls += 1
|
247
255
|
|
@@ -367,7 +375,7 @@ module RDF::Normalize
|
|
367
375
|
protected
|
368
376
|
|
369
377
|
def hexdigest(val)
|
370
|
-
|
378
|
+
hash_algorithm.hexdigest(val)
|
371
379
|
end
|
372
380
|
|
373
381
|
# Group adjacent bnodes by hash
|
data/lib/rdf/normalize.rb
CHANGED
@@ -3,7 +3,7 @@ require 'digest'
|
|
3
3
|
|
4
4
|
module RDF
|
5
5
|
##
|
6
|
-
# **`RDF::Normalize`** is an RDF Graph
|
6
|
+
# **`RDF::Normalize`** is an RDF Graph canonicalization plugin for RDF.rb.
|
7
7
|
#
|
8
8
|
# @example Requiring the `RDF::Normalize` module
|
9
9
|
# require 'rdf/normalize'
|
@@ -18,7 +18,7 @@ module RDF
|
|
18
18
|
# @example Returning normalized N-Quads
|
19
19
|
#
|
20
20
|
# g = RDF::Graph.load("etc/doap.ttl")
|
21
|
-
# g.dump(:normalize)
|
21
|
+
# g.dump(:normalize) # or :canonicalize
|
22
22
|
#
|
23
23
|
# @example Writing a repository as normalized N-Quads
|
24
24
|
#
|
@@ -66,5 +66,30 @@ module RDF
|
|
66
66
|
end
|
67
67
|
module_function :new
|
68
68
|
|
69
|
+
class MaxCallsExceeded < RuntimeError; end
|
70
|
+
class UnknownHashAlgorithm < RuntimeError; end
|
71
|
+
end
|
72
|
+
|
73
|
+
module Canonicalize
|
74
|
+
# RDF::Canonicalize extends RDF::Normalize.
|
75
|
+
include Normalize
|
76
|
+
end
|
77
|
+
|
78
|
+
# Change RDF::Enumerable#canonicalize
|
79
|
+
module Enumerable
|
80
|
+
##
|
81
|
+
# Returns the resulting Enumerable result from RDF::Normalize.
|
82
|
+
# This also canonicalizes URIs and Literals.
|
83
|
+
#
|
84
|
+
# @return [RDF::Enumerable]
|
85
|
+
remove_method :canonicalize if method_defined? :canonicalize
|
86
|
+
def canonicalize
|
87
|
+
# Ensure that statements are queryable, countable and enumerable
|
88
|
+
this = self
|
89
|
+
enum = Enumerator.new do |yielder|
|
90
|
+
this.send(:each_statement) {|y| yielder << y.canonicalize}
|
91
|
+
end
|
92
|
+
RDF::Normalize.new(enum)
|
93
|
+
end
|
69
94
|
end
|
70
95
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rdf-normalize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregg Kellogg
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdf
|
@@ -16,70 +16,70 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '3.
|
19
|
+
version: '3.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '3.
|
26
|
+
version: '3.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rdf-spec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '3.
|
33
|
+
version: '3.3'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '3.
|
40
|
+
version: '3.3'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '3.
|
47
|
+
version: '3.12'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '3.
|
54
|
+
version: '3.12'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: json-ld
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '3.
|
61
|
+
version: '3.3'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '3.
|
68
|
+
version: '3.3'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rdf-trig
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '3.
|
75
|
+
version: '3.3'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '3.
|
82
|
+
version: '3.3'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: yard
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -129,14 +129,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
129
|
requirements:
|
130
130
|
- - ">="
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: '
|
132
|
+
version: '3.0'
|
133
133
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
134
|
requirements:
|
135
135
|
- - ">="
|
136
136
|
- !ruby/object:Gem::Version
|
137
137
|
version: '0'
|
138
138
|
requirements: []
|
139
|
-
rubygems_version: 3.4.
|
139
|
+
rubygems_version: 3.4.19
|
140
140
|
signing_key:
|
141
141
|
specification_version: 4
|
142
142
|
summary: RDF Graph normalizer for Ruby.
|