tao_rdfizer 0.9.9 → 0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/tao_rdfizer/tao_rdfizer.rb +58 -28
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a2f897b03c9e6856aaf752fd645808c47e6d52422ea1b6934b44b694d8d0d9a1
|
4
|
+
data.tar.gz: 8d9fb426258bf3fceabb6471a81fcaeb99cd18dcace251575a0f2f715fa4d787
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0d230f25945bf45aed05f8f0cb3d46113d5eb4213b17ce4812cbfbcfa5636d540343b8ee8fe94cf1bbbc1a1eb4db35ac076bf1d7cd3cb7abdd92db446c94f41
|
7
|
+
data.tar.gz: 6f00c1aeb735b8e76da2e9185ad141c0c29c1d45d94faad1b3d89ec903a1c931e22957f4f356dce24ff6beadd53a77ac7fcd5aad2c17cb281a5d0128cd062b89
|
@@ -4,11 +4,11 @@ require 'erb'
|
|
4
4
|
module TAO; end unless defined? TAO
|
5
5
|
|
6
6
|
class TAO::RDFizer
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# if mode == :spans then produces span descriptions
|
8
|
+
# if mode == :annotations then produces annotation descriptions
|
9
|
+
# if mode == nil then produces both
|
10
|
+
def initialize(mode = nil)
|
11
|
+
@mode = mode
|
12
12
|
template = if !mode.nil? && mode == :spans
|
13
13
|
ERB_SPANS_TTL
|
14
14
|
else
|
@@ -17,7 +17,7 @@ class TAO::RDFizer
|
|
17
17
|
|
18
18
|
@tao_ttl_erb = ERB.new(template, nil, '-')
|
19
19
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
|
-
|
20
|
+
end
|
21
21
|
|
22
22
|
def rdfize(annotations_col)
|
23
23
|
# namespaces
|
@@ -27,6 +27,7 @@ class TAO::RDFizer
|
|
27
27
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
28
|
|
29
29
|
unless @mode ==:spans
|
30
|
+
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
30
31
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
31
32
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
32
33
|
project_uri = 'http://pubannotation.org/projects/' + anns[:project]
|
@@ -34,6 +35,7 @@ class TAO::RDFizer
|
|
34
35
|
end
|
35
36
|
|
36
37
|
denotations = []
|
38
|
+
attributes = []
|
37
39
|
relations = []
|
38
40
|
spans = []
|
39
41
|
|
@@ -46,30 +48,57 @@ class TAO::RDFizer
|
|
46
48
|
end
|
47
49
|
|
48
50
|
# denotations and relations
|
49
|
-
_denotations = annotations[:denotations]
|
50
|
-
|
51
|
-
|
52
|
-
_relations = [] if _relations.nil?
|
51
|
+
_denotations = annotations[:denotations] || []
|
52
|
+
_attributes = annotations[:attributes] || []
|
53
|
+
_relations = annotations[:relations] || []
|
53
54
|
if @mode == :spans && annotations.has_key?(:tracks)
|
54
55
|
annotations[:tracks].each do |track|
|
55
56
|
_denotations += track[:denotations]
|
57
|
+
_attributes += track[:attributes]
|
56
58
|
_relations += track[:relations]
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
62
|
+
begin
|
63
|
+
if @mode == :annotations
|
64
|
+
# index attributes
|
65
|
+
attributesh = _attributes.inject({}) do |h, a|
|
66
|
+
if a[:pred].end_with?('_id')
|
67
|
+
subj = a[:subj]
|
68
|
+
h[subj] = [] unless h.has_key? subj
|
69
|
+
h[subj] << a[:obj]
|
70
|
+
end
|
71
|
+
h
|
72
|
+
end
|
67
73
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
74
|
+
# denotations preprocessing
|
75
|
+
_denotations.each do |d|
|
76
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
77
|
+
d[:span_uri] = span_uri
|
78
|
+
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
79
|
+
class_uris = attributesh[d[:id]].push(d[:obj])
|
80
|
+
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
81
|
+
rescue ArgumentError => e
|
82
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
83
|
+
end
|
84
|
+
|
85
|
+
# relations preprocessing
|
86
|
+
_relations.each do |r|
|
87
|
+
r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
|
88
|
+
r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
|
89
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
|
90
|
+
rescue ArgumentError => e
|
91
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
|
92
|
+
end
|
93
|
+
else
|
94
|
+
# denotations preprocessing
|
95
|
+
_denotations.each do |d|
|
96
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
97
|
+
d[:span_uri] = span_uri
|
98
|
+
rescue ArgumentError => e
|
99
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
100
|
+
end
|
101
|
+
end
|
73
102
|
end
|
74
103
|
|
75
104
|
unless @mode == :annotations
|
@@ -84,7 +113,7 @@ class TAO::RDFizer
|
|
84
113
|
s[:text] = text[s[:begin] ... s[:end]]
|
85
114
|
end
|
86
115
|
|
87
|
-
# index
|
116
|
+
# index spans
|
88
117
|
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
89
118
|
|
90
119
|
# add denotation information
|
@@ -158,25 +187,26 @@ class TAO::RDFizer
|
|
158
187
|
end
|
159
188
|
|
160
189
|
def find_uri (label, namespaces, prefix_for_this)
|
190
|
+
raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
|
161
191
|
delimiter_position = label.index(':')
|
162
192
|
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
163
|
-
label
|
193
|
+
label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
|
164
194
|
elsif label =~ %r[^https?://]
|
165
195
|
"<#{label}>"
|
166
196
|
else
|
167
197
|
clabel = if label.match(/^\W+$/)
|
168
198
|
'SYM'
|
169
199
|
else
|
170
|
-
label.sub(/^\W+/, '').sub(
|
200
|
+
label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
171
201
|
end
|
172
|
-
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
|
202
|
+
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
173
203
|
end
|
174
204
|
end
|
175
205
|
|
176
206
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
177
207
|
<% denotations.each do |d| -%>
|
178
208
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
179
|
-
|
209
|
+
<%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
|
180
210
|
<% end -%>
|
181
211
|
<%# relations -%>
|
182
212
|
<% relations.each do |r| -%>
|
@@ -193,7 +223,7 @@ class TAO::RDFizer
|
|
193
223
|
<% s[:children].each do |s| -%>
|
194
224
|
tao:contains <%= s[:span_uri] %> ;
|
195
225
|
<% end -%>
|
196
|
-
tao:has_text <%= s[:text].
|
226
|
+
tao:has_text <%= s[:text].inspect %> ;
|
197
227
|
tao:belongs_to <<%= s[:source_uri] %>> ;
|
198
228
|
tao:begins_at <%= s[:begin] %> ;
|
199
229
|
tao:ends_at <%= s[:end] %> .
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.10'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
|
44
|
-
rubygems_version: 2.6.11
|
43
|
+
rubygems_version: 3.0.8
|
45
44
|
signing_key:
|
46
45
|
specification_version: 4
|
47
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|