tao_rdfizer 0.9.13 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tao_rdfizer +10 -1
- data/lib/tao_rdfizer/tao_rdfizer.rb +59 -16
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95aaa9006671c09cfcf08d12876932c5783f3d784ad3855b4a3819ad79febf87
|
4
|
+
data.tar.gz: 6f3970db2a93208bd760c68651bc33a212d7e384396314deaebe1fdc744b20b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ffe933f1e7ecfc91b7e51d010fc84d42c5afabfc0cac59df717f264a45d337a4e67d5a2e56082cc00a67b9b20594156fd60fe51e700a4a78cef8c3d2c50fe20
|
7
|
+
data.tar.gz: efc71f7e1765dff987554031bbf4edcea8881cabf232ac5504b67fb33d4ac5b514b96f02410b2293b3a37b443508a4167786bd67cf696543802dcba150146299
|
data/bin/tao_rdfizer
CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
mode = nil
|
6
|
+
options = {}
|
6
7
|
|
7
8
|
## command line option processing
|
8
9
|
require 'optparse'
|
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
|
|
17
18
|
mode = :spans
|
18
19
|
end
|
19
20
|
|
21
|
+
opts.on('-x', '--x-prefixes', 'without prefixes.') do
|
22
|
+
options[:with_prefixes] = false
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on('-o', '--only-prefixes', 'only prefixes.') do
|
26
|
+
options[:only_prefixes] = true
|
27
|
+
end
|
28
|
+
|
20
29
|
opts.on('-h', '--help', 'displays this screen.') do
|
21
30
|
puts opts
|
22
31
|
exit
|
@@ -34,7 +43,7 @@ begin
|
|
34
43
|
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
35
44
|
annotations = [annotations] unless annotations.class == Array
|
36
45
|
rdfizer = TAO::RDFizer.new(mode)
|
37
|
-
puts rdfizer.rdfize(annotations)
|
46
|
+
puts rdfizer.rdfize(annotations, options)
|
38
47
|
rescue ArgumentError, IOError => e
|
39
48
|
puts e.message
|
40
49
|
end
|
@@ -14,18 +14,45 @@ class TAO::RDFizer
|
|
14
14
|
else
|
15
15
|
ERB_ANNOTATIONS_TTL
|
16
16
|
end
|
17
|
-
|
18
17
|
@tao_ttl_erb = ERB.new(template, nil, '-')
|
19
18
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
19
|
end
|
21
20
|
|
22
|
-
def rdfize(annotations_col)
|
21
|
+
def rdfize(annotations_col, options = nil)
|
22
|
+
options ||= {}
|
23
|
+
only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
|
24
|
+
with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
|
25
|
+
|
26
|
+
# check the format
|
27
|
+
annotations_col.each do |annotations|
|
28
|
+
raise "'target' is missing" unless annotations.has_key? :target
|
29
|
+
end
|
30
|
+
|
23
31
|
# namespaces
|
24
32
|
namespaces = {}
|
25
33
|
|
26
34
|
anns = annotations_col.first
|
27
35
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
36
|
|
37
|
+
prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
|
38
|
+
|
39
|
+
if only_prefixes
|
40
|
+
prefixes_ttl
|
41
|
+
else
|
42
|
+
annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
|
43
|
+
if with_prefixes
|
44
|
+
prefixes_ttl + annotations_ttl
|
45
|
+
else
|
46
|
+
annotations_ttl
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def get_annotations_ttl(annotations_col, namespaces)
|
54
|
+
anns = annotations_col.first
|
55
|
+
|
29
56
|
unless @mode ==:spans
|
30
57
|
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
31
58
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
@@ -35,6 +62,7 @@ class TAO::RDFizer
|
|
35
62
|
end
|
36
63
|
|
37
64
|
denotations = []
|
65
|
+
attributes = []
|
38
66
|
relations = []
|
39
67
|
spans = []
|
40
68
|
|
@@ -47,25 +75,36 @@ class TAO::RDFizer
|
|
47
75
|
end
|
48
76
|
|
49
77
|
# denotations and relations
|
50
|
-
_denotations = annotations[:denotations]
|
51
|
-
|
52
|
-
|
53
|
-
_relations = [] if _relations.nil?
|
78
|
+
_denotations = annotations[:denotations] || []
|
79
|
+
_attributes = annotations[:attributes] || []
|
80
|
+
_relations = annotations[:relations] || []
|
54
81
|
if @mode == :spans && annotations.has_key?(:tracks)
|
55
82
|
annotations[:tracks].each do |track|
|
56
|
-
_denotations += track[:denotations]
|
57
|
-
|
83
|
+
_denotations += track[:denotations] if track.has_key? :denotations
|
84
|
+
_attributes += track[:attributes] if track.has_key? :attributes
|
85
|
+
_relations += track[:relations] if track.has_key? :relations
|
58
86
|
end
|
59
87
|
end
|
60
88
|
|
61
89
|
begin
|
62
|
-
|
90
|
+
unless @mode == :span
|
91
|
+
# index attributes
|
92
|
+
attributesh = _attributes.inject({}) do |h, a|
|
93
|
+
if a[:pred].end_with?('_id')
|
94
|
+
subj = a[:subj]
|
95
|
+
h[subj] = [] unless h.has_key? subj
|
96
|
+
h[subj] << a[:obj]
|
97
|
+
end
|
98
|
+
h
|
99
|
+
end
|
100
|
+
|
63
101
|
# denotations preprocessing
|
64
102
|
_denotations.each do |d|
|
65
103
|
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
66
104
|
d[:span_uri] = span_uri
|
67
105
|
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
68
|
-
d[:
|
106
|
+
class_uris = (attributesh[d[:id]] || []).push(d[:obj])
|
107
|
+
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
69
108
|
rescue ArgumentError => e
|
70
109
|
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
71
110
|
end
|
@@ -101,7 +140,7 @@ class TAO::RDFizer
|
|
101
140
|
s[:text] = text[s[:begin] ... s[:end]]
|
102
141
|
end
|
103
142
|
|
104
|
-
# index
|
143
|
+
# index spans
|
105
144
|
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
106
145
|
|
107
146
|
# add denotation information
|
@@ -155,11 +194,9 @@ class TAO::RDFizer
|
|
155
194
|
spans += _spans unless @mode == :annotations
|
156
195
|
end
|
157
196
|
|
158
|
-
|
197
|
+
@tao_ttl_erb.result(binding)
|
159
198
|
end
|
160
199
|
|
161
|
-
private
|
162
|
-
|
163
200
|
def include_parent?(spans, span)
|
164
201
|
# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
|
165
202
|
spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
|
@@ -175,7 +212,10 @@ class TAO::RDFizer
|
|
175
212
|
end
|
176
213
|
|
177
214
|
def find_uri (label, namespaces, prefix_for_this)
|
178
|
-
|
215
|
+
if label.match(/\s/)
|
216
|
+
# raise ArgumentError, "A label including a whitespace character found: #{label}."
|
217
|
+
label.gsub(/\s/, '_')
|
218
|
+
end
|
179
219
|
delimiter_position = label.index(':')
|
180
220
|
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
181
221
|
label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
|
@@ -191,10 +231,11 @@ class TAO::RDFizer
|
|
191
231
|
end
|
192
232
|
end
|
193
233
|
|
234
|
+
# variable: denotations, relations
|
194
235
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
195
236
|
<% denotations.each do |d| -%>
|
196
237
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
197
|
-
|
238
|
+
<%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
|
198
239
|
<% end -%>
|
199
240
|
<%# relations -%>
|
200
241
|
<% relations.each do |r| -%>
|
@@ -202,6 +243,7 @@ class TAO::RDFizer
|
|
202
243
|
<% end -%>
|
203
244
|
HEREDOC
|
204
245
|
|
246
|
+
# variable: spans
|
205
247
|
ERB_SPANS_TTL = <<~HEREDOC
|
206
248
|
<% spans.each do |s| -%>
|
207
249
|
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
@@ -218,6 +260,7 @@ class TAO::RDFizer
|
|
218
260
|
<% end -%>
|
219
261
|
HEREDOC
|
220
262
|
|
263
|
+
# variable: namespaces
|
221
264
|
ERB_PREFIXES_TTL = <<~HEREDOC
|
222
265
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
223
266
|
@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
|
44
|
-
rubygems_version: 2.7.9
|
43
|
+
rubygems_version: 3.0.9
|
45
44
|
signing_key:
|
46
45
|
specification_version: 4
|
47
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|