tao_rdfizer 0.9.12 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tao_rdfizer +10 -1
- data/lib/tao_rdfizer/tao_rdfizer.rb +78 -25
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce27142842502109c882dc01222a5aa8e402f21c9305bcd5797a712dde5a0f8b
|
4
|
+
data.tar.gz: 8b542b1d3d4992e1129fa95cde633967bb55e805d7ed69fc36cb95940215004f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65aecd55bf6fbd609565800231c8f87cad533bc25422912927ef9293e4e7e03bcab6bb5fd639320bf184cd7f0b0aa5de9c5a880734e768f6830761b5eaa6afec
|
7
|
+
data.tar.gz: ef195ea7ddae0dfa1409659066e5700369659d08fd40390c85f3384c3476ca44defd9c8eee602d6ebb5ef0dc75d63894a76fc5f597d79e57f1bf28598ecd20f5
|
data/bin/tao_rdfizer
CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
|
|
3
3
|
require 'json'
|
4
4
|
|
5
5
|
mode = nil
|
6
|
+
options = {}
|
6
7
|
|
7
8
|
## command line option processing
|
8
9
|
require 'optparse'
|
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
|
|
17
18
|
mode = :spans
|
18
19
|
end
|
19
20
|
|
21
|
+
opts.on('-x', '--x-prefixes', 'without prefixes.') do
|
22
|
+
options[:with_prefixes] = false
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on('-o', '--only-prefixes', 'only prefixes.') do
|
26
|
+
options[:only_prefixes] = true
|
27
|
+
end
|
28
|
+
|
20
29
|
opts.on('-h', '--help', 'displays this screen.') do
|
21
30
|
puts opts
|
22
31
|
exit
|
@@ -34,7 +43,7 @@ begin
|
|
34
43
|
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
35
44
|
annotations = [annotations] unless annotations.class == Array
|
36
45
|
rdfizer = TAO::RDFizer.new(mode)
|
37
|
-
puts rdfizer.rdfize(annotations)
|
46
|
+
puts rdfizer.rdfize(annotations, options)
|
38
47
|
rescue ArgumentError, IOError => e
|
39
48
|
puts e.message
|
40
49
|
end
|
@@ -14,19 +14,47 @@ class TAO::RDFizer
|
|
14
14
|
else
|
15
15
|
ERB_ANNOTATIONS_TTL
|
16
16
|
end
|
17
|
-
|
18
17
|
@tao_ttl_erb = ERB.new(template, nil, '-')
|
19
18
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
19
|
end
|
21
20
|
|
22
|
-
def rdfize(annotations_col)
|
21
|
+
def rdfize(annotations_col, options = nil)
|
22
|
+
options ||= {}
|
23
|
+
only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
|
24
|
+
with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
|
25
|
+
|
26
|
+
# check the format
|
27
|
+
annotations_col.each do |annotations|
|
28
|
+
raise "'target' is missing" unless annotations.has_key? :target
|
29
|
+
end
|
30
|
+
|
23
31
|
# namespaces
|
24
32
|
namespaces = {}
|
25
33
|
|
26
34
|
anns = annotations_col.first
|
27
35
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
36
|
|
37
|
+
prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
|
38
|
+
|
39
|
+
if only_prefixes
|
40
|
+
prefixes_ttl
|
41
|
+
else
|
42
|
+
annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
|
43
|
+
if with_prefixes
|
44
|
+
prefixes_ttl + annotations_ttl
|
45
|
+
else
|
46
|
+
annotations_ttl
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def get_annotations_ttl(annotations_col, namespaces)
|
54
|
+
anns = annotations_col.first
|
55
|
+
|
29
56
|
unless @mode ==:spans
|
57
|
+
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
30
58
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
31
59
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
32
60
|
project_uri = 'http://pubannotation.org/projects/' + anns[:project]
|
@@ -34,6 +62,7 @@ class TAO::RDFizer
|
|
34
62
|
end
|
35
63
|
|
36
64
|
denotations = []
|
65
|
+
attributes = []
|
37
66
|
relations = []
|
38
67
|
spans = []
|
39
68
|
|
@@ -46,34 +75,57 @@ class TAO::RDFizer
|
|
46
75
|
end
|
47
76
|
|
48
77
|
# denotations and relations
|
49
|
-
_denotations = annotations[:denotations]
|
50
|
-
|
51
|
-
|
52
|
-
_relations = [] if _relations.nil?
|
78
|
+
_denotations = annotations[:denotations] || []
|
79
|
+
_attributes = annotations[:attributes] || []
|
80
|
+
_relations = annotations[:relations] || []
|
53
81
|
if @mode == :spans && annotations.has_key?(:tracks)
|
54
82
|
annotations[:tracks].each do |track|
|
55
83
|
_denotations += track[:denotations]
|
84
|
+
_attributes += track[:attributes]
|
56
85
|
_relations += track[:relations]
|
57
86
|
end
|
58
87
|
end
|
59
88
|
|
60
89
|
begin
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
90
|
+
unless @mode == :span
|
91
|
+
# index attributes
|
92
|
+
attributesh = _attributes.inject({}) do |h, a|
|
93
|
+
if a[:pred].end_with?('_id')
|
94
|
+
subj = a[:subj]
|
95
|
+
h[subj] = [] unless h.has_key? subj
|
96
|
+
h[subj] << a[:obj]
|
97
|
+
end
|
98
|
+
h
|
99
|
+
end
|
68
100
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
101
|
+
# denotations preprocessing
|
102
|
+
_denotations.each do |d|
|
103
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
104
|
+
d[:span_uri] = span_uri
|
105
|
+
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
106
|
+
class_uris = (attributesh[d[:id]] || []).push(d[:obj])
|
107
|
+
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
108
|
+
rescue ArgumentError => e
|
109
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
110
|
+
end
|
111
|
+
|
112
|
+
# relations preprocessing
|
113
|
+
_relations.each do |r|
|
114
|
+
r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
|
115
|
+
r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
|
116
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
|
117
|
+
rescue ArgumentError => e
|
118
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
|
119
|
+
end
|
120
|
+
else
|
121
|
+
# denotations preprocessing
|
122
|
+
_denotations.each do |d|
|
123
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
124
|
+
d[:span_uri] = span_uri
|
125
|
+
rescue ArgumentError => e
|
126
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
127
|
+
end
|
74
128
|
end
|
75
|
-
rescue ArgumentError => e
|
76
|
-
raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
|
77
129
|
end
|
78
130
|
|
79
131
|
unless @mode == :annotations
|
@@ -88,7 +140,7 @@ class TAO::RDFizer
|
|
88
140
|
s[:text] = text[s[:begin] ... s[:end]]
|
89
141
|
end
|
90
142
|
|
91
|
-
# index
|
143
|
+
# index spans
|
92
144
|
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
93
145
|
|
94
146
|
# add denotation information
|
@@ -142,11 +194,9 @@ class TAO::RDFizer
|
|
142
194
|
spans += _spans unless @mode == :annotations
|
143
195
|
end
|
144
196
|
|
145
|
-
|
197
|
+
@tao_ttl_erb.result(binding)
|
146
198
|
end
|
147
199
|
|
148
|
-
private
|
149
|
-
|
150
200
|
def include_parent?(spans, span)
|
151
201
|
# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
|
152
202
|
spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
|
@@ -178,10 +228,11 @@ class TAO::RDFizer
|
|
178
228
|
end
|
179
229
|
end
|
180
230
|
|
231
|
+
# variable: denotations, relations
|
181
232
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
182
233
|
<% denotations.each do |d| -%>
|
183
234
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
184
|
-
|
235
|
+
<%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
|
185
236
|
<% end -%>
|
186
237
|
<%# relations -%>
|
187
238
|
<% relations.each do |r| -%>
|
@@ -189,6 +240,7 @@ class TAO::RDFizer
|
|
189
240
|
<% end -%>
|
190
241
|
HEREDOC
|
191
242
|
|
243
|
+
# variable: spans
|
192
244
|
ERB_SPANS_TTL = <<~HEREDOC
|
193
245
|
<% spans.each do |s| -%>
|
194
246
|
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
@@ -205,6 +257,7 @@ class TAO::RDFizer
|
|
205
257
|
<% end -%>
|
206
258
|
HEREDOC
|
207
259
|
|
260
|
+
# variable: namespaces
|
208
261
|
ERB_PREFIXES_TTL = <<~HEREDOC
|
209
262
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
210
263
|
@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
|
44
|
-
rubygems_version: 2.7.9
|
43
|
+
rubygems_version: 3.0.9
|
45
44
|
signing_key:
|
46
45
|
specification_version: 4
|
47
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|