tao_rdfizer 0.9.10 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tao_rdfizer/tao_rdfizer.rb +48 -23
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4baf53459b357e899ab4e569f15ba00e4240653641912488c2478ef0a65ac2bf
|
4
|
+
data.tar.gz: cd7d1a7121abbd4f725b1cbf5dff8cf28db27efdcab025e1c43305d0350157ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ba755e5d41a06b36d592dc52261ed65b9018c47ed4da5b29fa0bf2a49196e0ae29c046c1cf463be9bbaad510ddca2bed464ce17898c58480e3258668374a7675
|
7
|
+
data.tar.gz: cb71ac538c0573812317caa83a54b3069dca0221d85388dd2ede2e1251b4d820379e43ac45744dbc450faf4a3dcd0db542832075a4febdc03389d956a7cbd9ff
|
@@ -27,6 +27,7 @@ class TAO::RDFizer
|
|
27
27
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
28
|
|
29
29
|
unless @mode ==:spans
|
30
|
+
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
30
31
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
31
32
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
32
33
|
project_uri = 'http://pubannotation.org/projects/' + anns[:project]
|
@@ -34,6 +35,7 @@ class TAO::RDFizer
|
|
34
35
|
end
|
35
36
|
|
36
37
|
denotations = []
|
38
|
+
attributes = []
|
37
39
|
relations = []
|
38
40
|
spans = []
|
39
41
|
|
@@ -46,34 +48,57 @@ class TAO::RDFizer
|
|
46
48
|
end
|
47
49
|
|
48
50
|
# denotations and relations
|
49
|
-
_denotations = annotations[:denotations]
|
50
|
-
|
51
|
-
|
52
|
-
_relations = [] if _relations.nil?
|
51
|
+
_denotations = annotations[:denotations] || []
|
52
|
+
_attributes = annotations[:attributes] || []
|
53
|
+
_relations = annotations[:relations] || []
|
53
54
|
if @mode == :spans && annotations.has_key?(:tracks)
|
54
55
|
annotations[:tracks].each do |track|
|
55
56
|
_denotations += track[:denotations]
|
57
|
+
_attributes += track[:attributes]
|
56
58
|
_relations += track[:relations]
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
60
62
|
begin
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
63
|
+
if @mode == :annotations
|
64
|
+
# index attributes
|
65
|
+
attributesh = _attributes.inject({}) do |h, a|
|
66
|
+
if a[:pred].end_with?('_id')
|
67
|
+
subj = a[:subj]
|
68
|
+
h[subj] = [] unless h.has_key? subj
|
69
|
+
h[subj] << a[:obj]
|
70
|
+
end
|
71
|
+
h
|
72
|
+
end
|
68
73
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
+
# denotations preprocessing
|
75
|
+
_denotations.each do |d|
|
76
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
77
|
+
d[:span_uri] = span_uri
|
78
|
+
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
79
|
+
class_uris = (attributesh[d[:id]] || []).push(d[:obj])
|
80
|
+
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
81
|
+
rescue ArgumentError => e
|
82
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
83
|
+
end
|
84
|
+
|
85
|
+
# relations preprocessing
|
86
|
+
_relations.each do |r|
|
87
|
+
r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
|
88
|
+
r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
|
89
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
|
90
|
+
rescue ArgumentError => e
|
91
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
|
92
|
+
end
|
93
|
+
else
|
94
|
+
# denotations preprocessing
|
95
|
+
_denotations.each do |d|
|
96
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
97
|
+
d[:span_uri] = span_uri
|
98
|
+
rescue ArgumentError => e
|
99
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
100
|
+
end
|
74
101
|
end
|
75
|
-
rescue ArgumentError => e
|
76
|
-
raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
|
77
102
|
end
|
78
103
|
|
79
104
|
unless @mode == :annotations
|
@@ -88,7 +113,7 @@ class TAO::RDFizer
|
|
88
113
|
s[:text] = text[s[:begin] ... s[:end]]
|
89
114
|
end
|
90
115
|
|
91
|
-
# index
|
116
|
+
# index spans
|
92
117
|
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
93
118
|
|
94
119
|
# add denotation information
|
@@ -165,23 +190,23 @@ class TAO::RDFizer
|
|
165
190
|
raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
|
166
191
|
delimiter_position = label.index(':')
|
167
192
|
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
168
|
-
label
|
193
|
+
label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
|
169
194
|
elsif label =~ %r[^https?://]
|
170
195
|
"<#{label}>"
|
171
196
|
else
|
172
197
|
clabel = if label.match(/^\W+$/)
|
173
198
|
'SYM'
|
174
199
|
else
|
175
|
-
label.sub(/^\W+/, '').sub(
|
200
|
+
label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
176
201
|
end
|
177
|
-
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
|
202
|
+
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
178
203
|
end
|
179
204
|
end
|
180
205
|
|
181
206
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
182
207
|
<% denotations.each do |d| -%>
|
183
208
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
184
|
-
|
209
|
+
<%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
|
185
210
|
<% end -%>
|
186
211
|
<%# relations -%>
|
187
212
|
<% relations.each do |r| -%>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
|
44
|
-
rubygems_version: 2.7.9
|
43
|
+
rubygems_version: 3.0.8
|
45
44
|
signing_key:
|
46
45
|
specification_version: 4
|
47
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|