tao_rdfizer 0.10 → 0.11.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/tao_rdfizer +10 -1
- data/lib/tao_rdfizer/tao_rdfizer.rb +54 -12
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef68cf0cfd37026eda3abfb19f294d866c343294c1a9cc8f4ed2aa9af1f1443c
|
4
|
+
data.tar.gz: fdde442909c60d0dbfe81ea445b049a52829752d01b06f5f599786071b09b62c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0219d82d900259fd8e7dab5ea5382bc06bb504ea21207302b21b59047f33ba899491aa53043a9456af1c9e1693d9a957223a9cad31d8a2e21f4df60aebdbf4e
|
7
|
+
data.tar.gz: 5250d9a3daf7716e5c8f2e01fe2560589862fb3b7cdf7a03c067f07e47930f91b6912b7b9316d8de652fcf983f677e1ab0d9de16954f2fdf3880478e7f50344a
|
data/bin/tao_rdfizer
CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
mode = nil
|
6
|
+
options = {}
|
6
7
|
|
7
8
|
## command line option processing
|
8
9
|
require 'optparse'
|
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
|
|
17
18
|
mode = :spans
|
18
19
|
end
|
19
20
|
|
21
|
+
opts.on('-x', '--x-prefixes', 'without prefixes.') do
|
22
|
+
options[:with_prefixes] = false
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on('-o', '--only-prefixes', 'only prefixes.') do
|
26
|
+
options[:only_prefixes] = true
|
27
|
+
end
|
28
|
+
|
20
29
|
opts.on('-h', '--help', 'displays this screen.') do
|
21
30
|
puts opts
|
22
31
|
exit
|
@@ -34,7 +43,7 @@ begin
|
|
34
43
|
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
35
44
|
annotations = [annotations] unless annotations.class == Array
|
36
45
|
rdfizer = TAO::RDFizer.new(mode)
|
37
|
-
puts rdfizer.rdfize(annotations)
|
46
|
+
puts rdfizer.rdfize(annotations, options)
|
38
47
|
rescue ArgumentError, IOError => e
|
39
48
|
puts e.message
|
40
49
|
end
|
@@ -14,18 +14,45 @@ class TAO::RDFizer
|
|
14
14
|
else
|
15
15
|
ERB_ANNOTATIONS_TTL
|
16
16
|
end
|
17
|
-
|
18
17
|
@tao_ttl_erb = ERB.new(template, nil, '-')
|
19
18
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
19
|
end
|
21
20
|
|
22
|
-
def rdfize(annotations_col)
|
21
|
+
def rdfize(annotations_col, options = nil)
|
22
|
+
options ||= {}
|
23
|
+
only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
|
24
|
+
with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
|
25
|
+
|
26
|
+
# check the format
|
27
|
+
annotations_col.each do |annotations|
|
28
|
+
raise "'target' is missing" unless annotations.has_key? :target
|
29
|
+
end
|
30
|
+
|
23
31
|
# namespaces
|
24
32
|
namespaces = {}
|
25
33
|
|
26
34
|
anns = annotations_col.first
|
27
35
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
36
|
|
37
|
+
prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
|
38
|
+
|
39
|
+
if only_prefixes
|
40
|
+
prefixes_ttl
|
41
|
+
else
|
42
|
+
annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
|
43
|
+
if with_prefixes
|
44
|
+
prefixes_ttl + annotations_ttl
|
45
|
+
else
|
46
|
+
annotations_ttl
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def get_annotations_ttl(annotations_col, namespaces)
|
54
|
+
anns = annotations_col.first
|
55
|
+
|
29
56
|
unless @mode ==:spans
|
30
57
|
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
31
58
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
@@ -53,14 +80,14 @@ class TAO::RDFizer
|
|
53
80
|
_relations = annotations[:relations] || []
|
54
81
|
if @mode == :spans && annotations.has_key?(:tracks)
|
55
82
|
annotations[:tracks].each do |track|
|
56
|
-
_denotations += track[:denotations]
|
57
|
-
_attributes += track[:attributes]
|
58
|
-
_relations += track[:relations]
|
83
|
+
_denotations += track[:denotations] if track.has_key? :denotations
|
84
|
+
_attributes += track[:attributes] if track.has_key? :attributes
|
85
|
+
_relations += track[:relations] if track.has_key? :relations
|
59
86
|
end
|
60
87
|
end
|
61
88
|
|
62
89
|
begin
|
63
|
-
|
90
|
+
unless @mode == :span
|
64
91
|
# index attributes
|
65
92
|
attributesh = _attributes.inject({}) do |h, a|
|
66
93
|
if a[:pred].end_with?('_id')
|
@@ -76,7 +103,7 @@ class TAO::RDFizer
|
|
76
103
|
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
77
104
|
d[:span_uri] = span_uri
|
78
105
|
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
79
|
-
class_uris = attributesh[d[:id]].push(d[:obj])
|
106
|
+
class_uris = (attributesh[d[:id]] || []).push(d[:obj])
|
80
107
|
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
81
108
|
rescue ArgumentError => e
|
82
109
|
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
@@ -167,11 +194,9 @@ class TAO::RDFizer
|
|
167
194
|
spans += _spans unless @mode == :annotations
|
168
195
|
end
|
169
196
|
|
170
|
-
|
197
|
+
@tao_ttl_erb.result(binding)
|
171
198
|
end
|
172
199
|
|
173
|
-
private
|
174
|
-
|
175
200
|
def include_parent?(spans, span)
|
176
201
|
# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
|
177
202
|
spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
|
@@ -187,7 +212,10 @@ class TAO::RDFizer
|
|
187
212
|
end
|
188
213
|
|
189
214
|
def find_uri (label, namespaces, prefix_for_this)
|
190
|
-
|
215
|
+
if label.match(/\s/)
|
216
|
+
# raise ArgumentError, "A label including a whitespace character found: #{label}."
|
217
|
+
label.gsub(/\s/, '_')
|
218
|
+
end
|
191
219
|
delimiter_position = label.index(':')
|
192
220
|
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
193
221
|
label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
|
@@ -203,6 +231,18 @@ class TAO::RDFizer
|
|
203
231
|
end
|
204
232
|
end
|
205
233
|
|
234
|
+
def rdf_literal_escape(string)
|
235
|
+
string.gsub('\\', '\\\\').
|
236
|
+
gsub("\t", '\\t').
|
237
|
+
gsub("\b", '\\b').
|
238
|
+
gsub("\n", '\\n').
|
239
|
+
gsub("\r", '\\r').
|
240
|
+
gsub("\f", '\\f').
|
241
|
+
gsub('"', '\\"').
|
242
|
+
freeze
|
243
|
+
end
|
244
|
+
|
245
|
+
# variable: denotations, relations
|
206
246
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
207
247
|
<% denotations.each do |d| -%>
|
208
248
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
@@ -214,6 +254,7 @@ class TAO::RDFizer
|
|
214
254
|
<% end -%>
|
215
255
|
HEREDOC
|
216
256
|
|
257
|
+
# variable: spans
|
217
258
|
ERB_SPANS_TTL = <<~HEREDOC
|
218
259
|
<% spans.each do |s| -%>
|
219
260
|
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
@@ -223,13 +264,14 @@ class TAO::RDFizer
|
|
223
264
|
<% s[:children].each do |s| -%>
|
224
265
|
tao:contains <%= s[:span_uri] %> ;
|
225
266
|
<% end -%>
|
226
|
-
tao:has_text <%= s[:text]
|
267
|
+
tao:has_text "<%= rdf_literal_escape(s[:text]) %>" ;
|
227
268
|
tao:belongs_to <<%= s[:source_uri] %>> ;
|
228
269
|
tao:begins_at <%= s[:begin] %> ;
|
229
270
|
tao:ends_at <%= s[:end] %> .
|
230
271
|
<% end -%>
|
231
272
|
HEREDOC
|
232
273
|
|
274
|
+
# variable: namespaces
|
233
275
|
ERB_PREFIXES_TTL = <<~HEREDOC
|
234
276
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
235
277
|
@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,7 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
rubygems_version: 3.0.
|
43
|
+
rubygems_version: 3.0.9
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|