rdf-normalize 0.1.0 → 0.3.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d7076dcfeccdbfc0b35ec046d0b338a6ad41d776
4
- data.tar.gz: cd5f278797b575a3a6cced04890b9014c2350f42
3
+ metadata.gz: 1135be30a9a3c1f15e14fa57ff0dce5488a53734
4
+ data.tar.gz: fd85ba1edde8b8d03a297d65d4a69dae7da3f308
5
5
  SHA512:
6
- metadata.gz: 2510cec72f19af6eef55678382f688a5948b59fad4c6be18465c53a66d16b25bb543dadbd5a5148675d291afac133c8cc7d5399650ad9043b858c1a1f6165291
7
- data.tar.gz: f785bd00b4abacf7da181daae96e2101aa449b19791a08742654451cf9a3b25abdf368dd77d99a86c4f74a49084b2d9af6464ea30bbed45589f87713e899ee63
6
+ metadata.gz: c6de1463ff4f57d54e937e24ba59134a8c8f4cf10f228e9835d032118a3b6297a40d485075c35e679db3eae176340df0492af88efca3613d0bb00d0f413bb9f1
7
+ data.tar.gz: 979b2463ccc9859cd752548a9f9532f1c16206205c5c54554408099f2167427a97dc3f4b9b6525fe3ee096dacecb916ec78d6f4b32e42189a426cedb1547fc5d
data/README.md CHANGED
@@ -15,12 +15,12 @@ to serialize normalized statements.
15
15
  Algorithms implemented:
16
16
 
17
17
  * [URGNA2012](http://json-ld.github.io/normalization/spec/index.html#dfn-urgna2012)
18
- * [URDNA2014](http://json-ld.github.io/normalization/spec/index.html#dfn-urdna2015)
18
+ * [URDNA2015](http://json-ld.github.io/normalization/spec/index.html#dfn-urdna2015)
19
19
 
20
20
  Install with `gem install rdf-normalize`
21
21
 
22
22
  * 100% free and unencumbered [public domain](http://unlicense.org/) software.
23
- * Compatible with Ruby >= 1.9.3.
23
+ * Compatible with Ruby >= 2.0.
24
24
 
25
25
  ## Usage
26
26
 
@@ -38,8 +38,8 @@ Full documentation available on [Rubydoc.info][Normalize doc]
38
38
 
39
39
  ## Dependencies
40
40
 
41
- * [Ruby](http://ruby-lang.org/) (>= 1.9.2)
42
- * [RDF.rb](http://rubygems.org/gems/rdf) (~> 1.1)
41
+ * [Ruby](http://ruby-lang.org/) (>= 2.0)
42
+ * [RDF.rb](http://rubygems.org/gems/rdf) (~> 2.0)
43
43
 
44
44
  ## Installation
45
45
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.3.0.beta1
@@ -28,7 +28,6 @@ module RDF
28
28
  # @author [Gregg Kellogg](http://greggkellogg.net/)
29
29
  module Normalize
30
30
  require 'rdf/normalize/format'
31
- require 'rdf/normalize/utils'
32
31
  autoload :Base, 'rdf/normalize/base'
33
32
  autoload :Carroll2001,'rdf/normalize/carroll2001'
34
33
  autoload :URGNA2012, 'rdf/normalize/urgna2012'
@@ -1,8 +1,8 @@
1
1
  module RDF::Normalize
2
2
  class URDNA2015
3
3
  include RDF::Enumerable
4
+ include RDF::Util::Logger
4
5
  include Base
5
- include Utils
6
6
 
7
7
  ##
8
8
  # Create an enumerable with grounded nodes
@@ -35,8 +35,8 @@ module RDF::Normalize
35
35
 
36
36
  # Calculate hashes for first degree nodes
37
37
  non_normalized_identifiers.each do |node|
38
- hash = depth {ns.hash_first_degree_quads(node)}
39
- debug("1deg") {"hash: #{hash}"}
38
+ hash = log_depth {ns.hash_first_degree_quads(node)}
39
+ log_debug("1deg") {"hash: #{hash}"}
40
40
  ns.add_bnode_hash(node, hash)
41
41
  end
42
42
 
@@ -46,7 +46,7 @@ module RDF::Normalize
46
46
  next if identifier_list.length > 1
47
47
  node = identifier_list.first
48
48
  id = ns.canonical_issuer.issue_identifier(node)
49
- debug("single node") {"node: #{node.to_ntriples}, hash: #{hash}, id: #{id}"}
49
+ log_debug("single node") {"node: #{node.to_ntriples}, hash: #{hash}, id: #{id}"}
50
50
  non_normalized_identifiers -= identifier_list
51
51
  ns.hash_to_bnodes.delete(hash)
52
52
  simple = true
@@ -57,7 +57,7 @@ module RDF::Normalize
57
57
  ns.hash_to_bnodes.keys.sort.each do |hash|
58
58
  identifier_list = ns.hash_to_bnodes[hash]
59
59
 
60
- debug("multiple nodes") {"node: #{identifier_list.map(&:to_ntriples).join(",")}, hash: #{hash}"}
60
+ log_debug("multiple nodes") {"node: #{identifier_list.map(&:to_ntriples).join(",")}, hash: #{hash}"}
61
61
  hash_path_list = []
62
62
 
63
63
  # Create a hash_path_list for all bnodes using a temporary identifier used to create canonical replacements
@@ -65,15 +65,15 @@ module RDF::Normalize
65
65
  next if ns.canonical_issuer.issued.include?(identifier)
66
66
  temporary_issuer = IdentifierIssuer.new("_:b")
67
67
  temporary_issuer.issue_identifier(identifier)
68
- hash_path_list << depth {ns.hash_n_degree_quads(identifier, temporary_issuer)}
68
+ hash_path_list << log_depth {ns.hash_n_degree_quads(identifier, temporary_issuer)}
69
69
  end
70
- debug("->") {"hash_path_list: #{hash_path_list.map(&:first).inspect}"}
70
+ log_debug("->") {"hash_path_list: #{hash_path_list.map(&:first).inspect}"}
71
71
 
72
72
  # Create canonical replacements for nodes
73
73
  hash_path_list.sort_by(&:first).map(&:last).each do |issuer|
74
74
  issuer.issued.each do |node|
75
75
  id = ns.canonical_issuer.issue_identifier(node)
76
- debug("-->") {"node: #{node.to_ntriples}, id: #{id}"}
76
+ log_debug("-->") {"node: #{node.to_ntriples}, id: #{id}"}
77
77
  end
78
78
  end
79
79
  end
@@ -94,7 +94,7 @@ module RDF::Normalize
94
94
  private
95
95
 
96
96
  class NormalizationState
97
- include Utils
97
+ include RDF::Util::Logger
98
98
 
99
99
  attr_accessor :bnode_to_statements
100
100
  attr_accessor :hash_to_bnodes
@@ -116,7 +116,7 @@ module RDF::Normalize
116
116
  end
117
117
 
118
118
  # @param [RDF::Node] node
119
- # @return [String] the SHA1 hexdigest hash of statements using this node, with replacements
119
+ # @return [String] the SHA256 hexdigest hash of statements using this node, with replacements
120
120
  def hash_first_degree_quads(node)
121
121
  quads = bnode_to_statements[node].
122
122
  map do |statement|
@@ -130,7 +130,7 @@ module RDF::Normalize
130
130
  RDF::NQuads::Writer.serialize(RDF::Statement.from(quad))
131
131
  end
132
132
 
133
- debug("1deg") {"node: #{node}, quads: #{quads}"}
133
+ log_debug("1deg") {"node: #{node}, quads: #{quads}"}
134
134
  hexdigest(quads.sort.join)
135
135
  end
136
136
 
@@ -138,7 +138,7 @@ module RDF::Normalize
138
138
  # @param [RDF::Statement] statement
139
139
  # @param [IdentifierIssuer] issuer
140
140
  # @param [String] position one of :s, :o, or :g
141
- # @return [String] the SHA1 hexdigest hash
141
+ # @return [String] the SHA256 hexdigest hash
142
142
  def hash_related_node(related, statement, issuer, position)
143
143
  identifier = canonical_issuer.identifier(related) ||
144
144
  issuer.identifier(related) ||
@@ -146,7 +146,7 @@ module RDF::Normalize
146
146
  input = position.to_s
147
147
  input << statement.predicate.to_ntriples unless position == :g
148
148
  input << identifier
149
- debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
149
+ log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
150
150
  hexdigest(input)
151
151
  end
152
152
 
@@ -154,7 +154,7 @@ module RDF::Normalize
154
154
  # @param [IdentifierIssuer] issuer
155
155
  # @return [Array<String,IdentifierIssuer>] the Hash and issuer
156
156
  def hash_n_degree_quads(identifier, issuer)
157
- debug("ndeg") {"identifier: #{identifier.to_ntriples}"}
157
+ log_debug("ndeg") {"identifier: #{identifier.to_ntriples}"}
158
158
 
159
159
  # hash to related blank nodes map
160
160
  map = {}
@@ -165,8 +165,8 @@ module RDF::Normalize
165
165
 
166
166
  data_to_hash = ""
167
167
 
168
- debug("ndeg") {"map: #{map.map {|h,l| "#{h}: #{l.map(&:to_ntriples)}"}.join('; ')}"}
169
- depth do
168
+ log_debug("ndeg") {"map: #{map.map {|h,l| "#{h}: #{l.map(&:to_ntriples)}"}.join('; ')}"}
169
+ log_depth do
170
170
  map.keys.sort.each do |hash|
171
171
  list = map[hash]
172
172
  # Iterate over related nodes
@@ -174,7 +174,7 @@ module RDF::Normalize
174
174
  data_to_hash += hash
175
175
 
176
176
  list.permutation do |permutation|
177
- debug("ndeg") {"perm: #{permutation.map(&:to_ntriples).join(",")}"}
177
+ log_debug("ndeg") {"perm: #{permutation.map(&:to_ntriples).join(",")}"}
178
178
  issuer_copy, path, recursion_list = issuer.dup, "", []
179
179
 
180
180
  permutation.each do |related|
@@ -188,10 +188,10 @@ module RDF::Normalize
188
188
  # Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
189
189
  break if !chosen_path.empty? && path.length >= chosen_path.length
190
190
  end
191
- debug("ndeg") {"hash: #{hash}, path: #{path}, recursion: #{recursion_list.map(&:to_ntriples)}"}
191
+ log_debug("ndeg") {"hash: #{hash}, path: #{path}, recursion: #{recursion_list.map(&:to_ntriples)}"}
192
192
 
193
193
  recursion_list.each do |related|
194
- result = depth {hash_n_degree_quads(related, issuer_copy)}
194
+ result = log_depth {hash_n_degree_quads(related, issuer_copy)}
195
195
  path << issuer_copy.issue_identifier(related)
196
196
  path << "<#{result.first}>"
197
197
  issuer_copy = result.last
@@ -208,15 +208,14 @@ module RDF::Normalize
208
208
  end
209
209
  end
210
210
 
211
- debug("ndeg") {"datatohash: #{data_to_hash.inspect}, hash: #{hexdigest(data_to_hash)}"}
211
+ log_debug("ndeg") {"datatohash: #{data_to_hash.inspect}, hash: #{hexdigest(data_to_hash)}"}
212
212
  return [hexdigest(data_to_hash), issuer]
213
213
  end
214
214
 
215
215
  protected
216
216
 
217
- # FIXME: should be SHA-256.
218
217
  def hexdigest(val)
219
- Digest::SHA1.hexdigest(val)
218
+ Digest::SHA256.hexdigest(val)
220
219
  end
221
220
 
222
221
  # Group adjacent bnodes by hash
@@ -224,7 +223,7 @@ module RDF::Normalize
224
223
  statement.to_hash(:s, :p, :o, :g).each do |pos, term|
225
224
  next if !term.is_a?(RDF::Node) || term == identifier
226
225
 
227
- hash = depth {hash_related_node(term, statement, issuer, pos)}
226
+ hash = log_depth {hash_related_node(term, statement, issuer, pos)}
228
227
  map[hash] ||= []
229
228
  map[hash] << term unless map[hash].include?(term)
230
229
  end
@@ -26,18 +26,18 @@ module RDF::Normalize
26
26
  input = position.to_s
27
27
  input << statement.predicate.to_s
28
28
  input << identifier
29
- debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
29
+ log_debug("hrel") {"input: #{input.inspect}, hash: #{hexdigest(input)}"}
30
30
  hexdigest(input)
31
31
  end
32
32
 
33
33
  # In URGNA2012, the position parameter passed to the Hash Related Blank Node algorithm was instead modeled as a direction parameter, where it could have the value p, for property, when the related blank node was a `subject` and the value r, for reverse or reference, when the related blank node was an `object`. Since URGNA2012 only normalized graphs, not datasets, there was no use of the `graph` position.
34
34
  def hash_related_statement(identifier, statement, issuer, map)
35
35
  if statement.subject.node? && statement.subject != identifier
36
- hash = depth {hash_related_node(statement.subject, statement, issuer, :p)}
36
+ hash = log_depth {hash_related_node(statement.subject, statement, issuer, :p)}
37
37
  map[hash] ||= []
38
38
  map[hash] << statement.subject unless map[hash].include?(statement.subject)
39
39
  elsif statement.object.node? && statement.object != identifier
40
- hash = depth {hash_related_node(statement.object, statement, issuer, :r)}
40
+ hash = log_depth {hash_related_node(statement.object, statement, issuer, :r)}
41
41
  map[hash] ||= []
42
42
  map[hash] << statement.object unless map[hash].include?(statement.object)
43
43
  end
@@ -25,7 +25,6 @@ module RDF::Normalize
25
25
  # @yieldparam [RDF::Writer] writer
26
26
  def initialize(output = $stdout, options = {}, &block)
27
27
  super do
28
- @options[:depth] ||= 0
29
28
  @repo = RDF::Repository.new
30
29
  if block_given?
31
30
  case block.arity
@@ -36,10 +35,17 @@ module RDF::Normalize
36
35
  end
37
36
  end
38
37
 
38
+
39
39
  ##
40
- # Defer writing to epilogue
41
- def write_statement(statement)
42
- self
40
+ # Adds statements to the repository to be serialized in epilogue.
41
+ #
42
+ # @param [RDF::Resource] subject
43
+ # @param [RDF::URI] predicate
44
+ # @param [RDF::Value] object
45
+ # @param [RDF::Resource] graph_name
46
+ # @return [void]
47
+ def write_quad(subject, predicate, object, graph_name)
48
+ @repo.insert(RDF::Statement(subject, predicate, object, graph_name: graph_name))
43
49
  end
44
50
 
45
51
  ##
@@ -55,18 +61,11 @@ module RDF::Normalize
55
61
  each do |line|
56
62
  puts line
57
63
  end
64
+ super
58
65
  end
59
66
 
60
67
  protected
61
68
 
62
- ##
63
- # Adds a statement to be serialized
64
- # @param [RDF::Statement] statement
65
- # @return [void]
66
- def insert_statement(statement)
67
- @repo.insert(statement)
68
- end
69
-
70
69
  ##
71
70
  # Insert an Enumerable
72
71
  #
metadata CHANGED
@@ -1,43 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdf-normalize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregg Kellogg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-20 00:00:00.000000000 Z
11
+ date: 2016-02-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdf
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.0.beta
20
+ - - "<"
18
21
  - !ruby/object:Gem::Version
19
- version: '1.1'
22
+ version: '3'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - ">="
25
28
  - !ruby/object:Gem::Version
26
- version: '1.1'
29
+ version: 2.0.0.beta
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: rdf-spec
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
- - - "~>"
37
+ - - ">="
32
38
  - !ruby/object:Gem::Version
33
- version: '1.1'
39
+ version: 2.0.0.beta
40
+ - - "<"
41
+ - !ruby/object:Gem::Version
42
+ version: '3'
34
43
  type: :development
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
37
46
  requirements:
38
- - - "~>"
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 2.0.0.beta
50
+ - - "<"
39
51
  - !ruby/object:Gem::Version
40
- version: '1.1'
52
+ version: '3'
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: open-uri-cached
43
55
  requirement: !ruby/object:Gem::Requirement
@@ -90,16 +102,22 @@ dependencies:
90
102
  name: json-ld
91
103
  requirement: !ruby/object:Gem::Requirement
92
104
  requirements:
93
- - - "~>"
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: 2.0.0.beta
108
+ - - "<"
94
109
  - !ruby/object:Gem::Version
95
- version: '1.1'
110
+ version: '3'
96
111
  type: :development
97
112
  prerelease: false
98
113
  version_requirements: !ruby/object:Gem::Requirement
99
114
  requirements:
100
- - - "~>"
115
+ - - ">="
101
116
  - !ruby/object:Gem::Version
102
- version: '1.1'
117
+ version: 2.0.0.beta
118
+ - - "<"
119
+ - !ruby/object:Gem::Version
120
+ version: '3'
103
121
  - !ruby/object:Gem::Dependency
104
122
  name: yard
105
123
  requirement: !ruby/object:Gem::Requirement
@@ -130,11 +148,10 @@ files:
130
148
  - lib/rdf/normalize/format.rb
131
149
  - lib/rdf/normalize/urdna2015.rb
132
150
  - lib/rdf/normalize/urgna2012.rb
133
- - lib/rdf/normalize/utils.rb
134
151
  - lib/rdf/normalize/writer.rb
135
152
  homepage: http://github.com/gkellogg/rdf-normalize
136
153
  licenses:
137
- - Public Domain
154
+ - Unlicense
138
155
  metadata: {}
139
156
  post_install_message:
140
157
  rdoc_options: []
@@ -144,15 +161,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
144
161
  requirements:
145
162
  - - ">="
146
163
  - !ruby/object:Gem::Version
147
- version: 1.9.2
164
+ version: 2.0.0
148
165
  required_rubygems_version: !ruby/object:Gem::Requirement
149
166
  requirements:
150
- - - ">="
167
+ - - ">"
151
168
  - !ruby/object:Gem::Version
152
- version: '0'
169
+ version: 1.3.1
153
170
  requirements: []
154
171
  rubyforge_project: rdf-normalize
155
- rubygems_version: 2.4.7
172
+ rubygems_version: 2.5.1
156
173
  signing_key:
157
174
  specification_version: 4
158
175
  summary: RDF Graph normalizer for Ruby.
@@ -1,33 +0,0 @@
1
- module RDF::Normalize
2
- module Utils
3
- # Add debug event to debug array, if specified
4
- #
5
- # param [String] message
6
- # yieldreturn [String] appended to message, to allow for lazy-evaulation of message
7
- def debug(*args)
8
- options = args.last.is_a?(Hash) ? args.pop : {}
9
- return unless options[:debug] || @options[:debug]
10
- depth = options[:depth] || @options[:depth]
11
- d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
12
- list = args
13
- list << yield if block_given?
14
- message = d_str + (list.empty? ? "" : list.join(": "))
15
- options[:debug] << message if options[:debug].is_a?(Array)
16
- @options[:debug] << message if @options[:debug].is_a?(Array)
17
- $stderr.puts(message) if @options[:debug] == TrueClass
18
- end
19
- module_function :debug
20
-
21
- # Increase depth around a method invocation
22
- # @yield
23
- # Yields with no arguments
24
- # @yieldreturn [Object] returns the result of yielding
25
- # @return [Object]
26
- def depth
27
- @options[:depth] += 1
28
- ret = yield
29
- @options[:depth] -= 1
30
- ret
31
- end
32
- end
33
- end