tao_rdfizer 0.10 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tao_rdfizer +10 -1
- data/lib/tao_rdfizer/tao_rdfizer.rb +54 -12
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef68cf0cfd37026eda3abfb19f294d866c343294c1a9cc8f4ed2aa9af1f1443c
|
4
|
+
data.tar.gz: fdde442909c60d0dbfe81ea445b049a52829752d01b06f5f599786071b09b62c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0219d82d900259fd8e7dab5ea5382bc06bb504ea21207302b21b59047f33ba899491aa53043a9456af1c9e1693d9a957223a9cad31d8a2e21f4df60aebdbf4e
|
7
|
+
data.tar.gz: 5250d9a3daf7716e5c8f2e01fe2560589862fb3b7cdf7a03c067f07e47930f91b6912b7b9316d8de652fcf983f677e1ab0d9de16954f2fdf3880478e7f50344a
|
data/bin/tao_rdfizer
CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
mode = nil
|
6
|
+
options = {}
|
6
7
|
|
7
8
|
## command line option processing
|
8
9
|
require 'optparse'
|
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
|
|
17
18
|
mode = :spans
|
18
19
|
end
|
19
20
|
|
21
|
+
opts.on('-x', '--x-prefixes', 'without prefixes.') do
|
22
|
+
options[:with_prefixes] = false
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on('-o', '--only-prefixes', 'only prefixes.') do
|
26
|
+
options[:only_prefixes] = true
|
27
|
+
end
|
28
|
+
|
20
29
|
opts.on('-h', '--help', 'displays this screen.') do
|
21
30
|
puts opts
|
22
31
|
exit
|
@@ -34,7 +43,7 @@ begin
|
|
34
43
|
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
35
44
|
annotations = [annotations] unless annotations.class == Array
|
36
45
|
rdfizer = TAO::RDFizer.new(mode)
|
37
|
-
puts rdfizer.rdfize(annotations)
|
46
|
+
puts rdfizer.rdfize(annotations, options)
|
38
47
|
rescue ArgumentError, IOError => e
|
39
48
|
puts e.message
|
40
49
|
end
|
@@ -14,18 +14,45 @@ class TAO::RDFizer
|
|
14
14
|
else
|
15
15
|
ERB_ANNOTATIONS_TTL
|
16
16
|
end
|
17
|
-
|
18
17
|
@tao_ttl_erb = ERB.new(template, nil, '-')
|
19
18
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
19
|
end
|
21
20
|
|
22
|
-
def rdfize(annotations_col)
|
21
|
+
def rdfize(annotations_col, options = nil)
|
22
|
+
options ||= {}
|
23
|
+
only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
|
24
|
+
with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
|
25
|
+
|
26
|
+
# check the format
|
27
|
+
annotations_col.each do |annotations|
|
28
|
+
raise "'target' is missing" unless annotations.has_key? :target
|
29
|
+
end
|
30
|
+
|
23
31
|
# namespaces
|
24
32
|
namespaces = {}
|
25
33
|
|
26
34
|
anns = annotations_col.first
|
27
35
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
36
|
|
37
|
+
prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
|
38
|
+
|
39
|
+
if only_prefixes
|
40
|
+
prefixes_ttl
|
41
|
+
else
|
42
|
+
annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
|
43
|
+
if with_prefixes
|
44
|
+
prefixes_ttl + annotations_ttl
|
45
|
+
else
|
46
|
+
annotations_ttl
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def get_annotations_ttl(annotations_col, namespaces)
|
54
|
+
anns = annotations_col.first
|
55
|
+
|
29
56
|
unless @mode ==:spans
|
30
57
|
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
31
58
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
@@ -53,14 +80,14 @@ class TAO::RDFizer
|
|
53
80
|
_relations = annotations[:relations] || []
|
54
81
|
if @mode == :spans && annotations.has_key?(:tracks)
|
55
82
|
annotations[:tracks].each do |track|
|
56
|
-
_denotations += track[:denotations]
|
57
|
-
_attributes += track[:attributes]
|
58
|
-
_relations += track[:relations]
|
83
|
+
_denotations += track[:denotations] if track.has_key? :denotations
|
84
|
+
_attributes += track[:attributes] if track.has_key? :attributes
|
85
|
+
_relations += track[:relations] if track.has_key? :relations
|
59
86
|
end
|
60
87
|
end
|
61
88
|
|
62
89
|
begin
|
63
|
-
|
90
|
+
unless @mode == :span
|
64
91
|
# index attributes
|
65
92
|
attributesh = _attributes.inject({}) do |h, a|
|
66
93
|
if a[:pred].end_with?('_id')
|
@@ -76,7 +103,7 @@ class TAO::RDFizer
|
|
76
103
|
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
77
104
|
d[:span_uri] = span_uri
|
78
105
|
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
79
|
-
class_uris = attributesh[d[:id]].push(d[:obj])
|
106
|
+
class_uris = (attributesh[d[:id]] || []).push(d[:obj])
|
80
107
|
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
81
108
|
rescue ArgumentError => e
|
82
109
|
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
@@ -167,11 +194,9 @@ class TAO::RDFizer
|
|
167
194
|
spans += _spans unless @mode == :annotations
|
168
195
|
end
|
169
196
|
|
170
|
-
|
197
|
+
@tao_ttl_erb.result(binding)
|
171
198
|
end
|
172
199
|
|
173
|
-
private
|
174
|
-
|
175
200
|
def include_parent?(spans, span)
|
176
201
|
# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
|
177
202
|
spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
|
@@ -187,7 +212,10 @@ class TAO::RDFizer
|
|
187
212
|
end
|
188
213
|
|
189
214
|
def find_uri (label, namespaces, prefix_for_this)
|
190
|
-
|
215
|
+
if label.match(/\s/)
|
216
|
+
# raise ArgumentError, "A label including a whitespace character found: #{label}."
|
217
|
+
label.gsub(/\s/, '_')
|
218
|
+
end
|
191
219
|
delimiter_position = label.index(':')
|
192
220
|
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
193
221
|
label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
|
@@ -203,6 +231,18 @@ class TAO::RDFizer
|
|
203
231
|
end
|
204
232
|
end
|
205
233
|
|
234
|
+
def rdf_literal_escape(string)
|
235
|
+
string.gsub('\\', '\\\\').
|
236
|
+
gsub("\t", '\\t').
|
237
|
+
gsub("\b", '\\b').
|
238
|
+
gsub("\n", '\\n').
|
239
|
+
gsub("\r", '\\r').
|
240
|
+
gsub("\f", '\\f').
|
241
|
+
gsub('"', '\\"').
|
242
|
+
freeze
|
243
|
+
end
|
244
|
+
|
245
|
+
# variable: denotations, relations
|
206
246
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
207
247
|
<% denotations.each do |d| -%>
|
208
248
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
@@ -214,6 +254,7 @@ class TAO::RDFizer
|
|
214
254
|
<% end -%>
|
215
255
|
HEREDOC
|
216
256
|
|
257
|
+
# variable: spans
|
217
258
|
ERB_SPANS_TTL = <<~HEREDOC
|
218
259
|
<% spans.each do |s| -%>
|
219
260
|
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
@@ -223,13 +264,14 @@ class TAO::RDFizer
|
|
223
264
|
<% s[:children].each do |s| -%>
|
224
265
|
tao:contains <%= s[:span_uri] %> ;
|
225
266
|
<% end -%>
|
226
|
-
tao:has_text <%= s[:text]
|
267
|
+
tao:has_text "<%= rdf_literal_escape(s[:text]) %>" ;
|
227
268
|
tao:belongs_to <<%= s[:source_uri] %>> ;
|
228
269
|
tao:begins_at <%= s[:begin] %> ;
|
229
270
|
tao:ends_at <%= s[:end] %> .
|
230
271
|
<% end -%>
|
231
272
|
HEREDOC
|
232
273
|
|
274
|
+
# variable: namespaces
|
233
275
|
ERB_PREFIXES_TTL = <<~HEREDOC
|
234
276
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
235
277
|
@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,7 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
rubygems_version: 3.0.
|
43
|
+
rubygems_version: 3.0.9
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|