tao_rdfizer 0.9.9 → 0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/tao_rdfizer/tao_rdfizer.rb +58 -28
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a2f897b03c9e6856aaf752fd645808c47e6d52422ea1b6934b44b694d8d0d9a1
|
4
|
+
data.tar.gz: 8d9fb426258bf3fceabb6471a81fcaeb99cd18dcace251575a0f2f715fa4d787
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0d230f25945bf45aed05f8f0cb3d46113d5eb4213b17ce4812cbfbcfa5636d540343b8ee8fe94cf1bbbc1a1eb4db35ac076bf1d7cd3cb7abdd92db446c94f41
|
7
|
+
data.tar.gz: 6f00c1aeb735b8e76da2e9185ad141c0c29c1d45d94faad1b3d89ec903a1c931e22957f4f356dce24ff6beadd53a77ac7fcd5aad2c17cb281a5d0128cd062b89
|
@@ -4,11 +4,11 @@ require 'erb'
|
|
4
4
|
module TAO; end unless defined? TAO
|
5
5
|
|
6
6
|
class TAO::RDFizer
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
# if mode == :spans then produces span descriptions
|
8
|
+
# if mode == :annotations then produces annotation descriptions
|
9
|
+
# if mode == nil then produces both
|
10
|
+
def initialize(mode = nil)
|
11
|
+
@mode = mode
|
12
12
|
template = if !mode.nil? && mode == :spans
|
13
13
|
ERB_SPANS_TTL
|
14
14
|
else
|
@@ -17,7 +17,7 @@ class TAO::RDFizer
|
|
17
17
|
|
18
18
|
@tao_ttl_erb = ERB.new(template, nil, '-')
|
19
19
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
|
-
|
20
|
+
end
|
21
21
|
|
22
22
|
def rdfize(annotations_col)
|
23
23
|
# namespaces
|
@@ -27,6 +27,7 @@ class TAO::RDFizer
|
|
27
27
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
28
|
|
29
29
|
unless @mode ==:spans
|
30
|
+
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
30
31
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
31
32
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
32
33
|
project_uri = 'http://pubannotation.org/projects/' + anns[:project]
|
@@ -34,6 +35,7 @@ class TAO::RDFizer
|
|
34
35
|
end
|
35
36
|
|
36
37
|
denotations = []
|
38
|
+
attributes = []
|
37
39
|
relations = []
|
38
40
|
spans = []
|
39
41
|
|
@@ -46,30 +48,57 @@ class TAO::RDFizer
|
|
46
48
|
end
|
47
49
|
|
48
50
|
# denotations and relations
|
49
|
-
_denotations = annotations[:denotations]
|
50
|
-
|
51
|
-
|
52
|
-
_relations = [] if _relations.nil?
|
51
|
+
_denotations = annotations[:denotations] || []
|
52
|
+
_attributes = annotations[:attributes] || []
|
53
|
+
_relations = annotations[:relations] || []
|
53
54
|
if @mode == :spans && annotations.has_key?(:tracks)
|
54
55
|
annotations[:tracks].each do |track|
|
55
56
|
_denotations += track[:denotations]
|
57
|
+
_attributes += track[:attributes]
|
56
58
|
_relations += track[:relations]
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
62
|
+
begin
|
63
|
+
if @mode == :annotations
|
64
|
+
# index attributes
|
65
|
+
attributesh = _attributes.inject({}) do |h, a|
|
66
|
+
if a[:pred].end_with?('_id')
|
67
|
+
subj = a[:subj]
|
68
|
+
h[subj] = [] unless h.has_key? subj
|
69
|
+
h[subj] << a[:obj]
|
70
|
+
end
|
71
|
+
h
|
72
|
+
end
|
67
73
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
74
|
+
# denotations preprocessing
|
75
|
+
_denotations.each do |d|
|
76
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
77
|
+
d[:span_uri] = span_uri
|
78
|
+
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
79
|
+
class_uris = attributesh[d[:id]].push(d[:obj])
|
80
|
+
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
81
|
+
rescue ArgumentError => e
|
82
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
83
|
+
end
|
84
|
+
|
85
|
+
# relations preprocessing
|
86
|
+
_relations.each do |r|
|
87
|
+
r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
|
88
|
+
r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
|
89
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
|
90
|
+
rescue ArgumentError => e
|
91
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
|
92
|
+
end
|
93
|
+
else
|
94
|
+
# denotations preprocessing
|
95
|
+
_denotations.each do |d|
|
96
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
97
|
+
d[:span_uri] = span_uri
|
98
|
+
rescue ArgumentError => e
|
99
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
100
|
+
end
|
101
|
+
end
|
73
102
|
end
|
74
103
|
|
75
104
|
unless @mode == :annotations
|
@@ -84,7 +113,7 @@ class TAO::RDFizer
|
|
84
113
|
s[:text] = text[s[:begin] ... s[:end]]
|
85
114
|
end
|
86
115
|
|
87
|
-
# index
|
116
|
+
# index spans
|
88
117
|
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
89
118
|
|
90
119
|
# add denotation information
|
@@ -158,25 +187,26 @@ class TAO::RDFizer
|
|
158
187
|
end
|
159
188
|
|
160
189
|
def find_uri (label, namespaces, prefix_for_this)
|
190
|
+
raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
|
161
191
|
delimiter_position = label.index(':')
|
162
192
|
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
163
|
-
label
|
193
|
+
label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
|
164
194
|
elsif label =~ %r[^https?://]
|
165
195
|
"<#{label}>"
|
166
196
|
else
|
167
197
|
clabel = if label.match(/^\W+$/)
|
168
198
|
'SYM'
|
169
199
|
else
|
170
|
-
label.sub(/^\W+/, '').sub(
|
200
|
+
label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
171
201
|
end
|
172
|
-
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
|
202
|
+
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
173
203
|
end
|
174
204
|
end
|
175
205
|
|
176
206
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
177
207
|
<% denotations.each do |d| -%>
|
178
208
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
179
|
-
|
209
|
+
<%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
|
180
210
|
<% end -%>
|
181
211
|
<%# relations -%>
|
182
212
|
<% relations.each do |r| -%>
|
@@ -193,7 +223,7 @@ class TAO::RDFizer
|
|
193
223
|
<% s[:children].each do |s| -%>
|
194
224
|
tao:contains <%= s[:span_uri] %> ;
|
195
225
|
<% end -%>
|
196
|
-
tao:has_text <%= s[:text].
|
226
|
+
tao:has_text <%= s[:text].inspect %> ;
|
197
227
|
tao:belongs_to <<%= s[:source_uri] %>> ;
|
198
228
|
tao:begins_at <%= s[:begin] %> ;
|
199
229
|
tao:ends_at <%= s[:end] %> .
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.10'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
|
44
|
-
rubygems_version: 2.6.11
|
43
|
+
rubygems_version: 3.0.8
|
45
44
|
signing_key:
|
46
45
|
specification_version: 4
|
47
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|