tao_rdfizer 0.11.3 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tao_rdfizer/tao_rdfizer.rb +43 -55
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59d7999a921ef33104728a1f86edbeb7f20161641b37a53435daf316772ffed8
|
4
|
+
data.tar.gz: d6cd66e38eececa7b7cbb251686d148a4924da16ad751eea3af2cef21b00121a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e98ac931a0f2913f44e8e234222f55c93ff9bbd76173787393fcf1e866a14b7707168c8446f33cfd00f208599439cacdd67b4b6296c9ab8a1288a3963fd4f0c
|
7
|
+
data.tar.gz: 788aaf5c427d44783edadfc30f84a8d2cc7aac99641575dd3999f780520c374cf5c443fc9a5dd656424a4291940861fa268a37b6ca4f3dab9a2ae196f6df2bc8
|
@@ -14,14 +14,14 @@ class TAO::RDFizer
|
|
14
14
|
else
|
15
15
|
ERB_ANNOTATIONS_TTL
|
16
16
|
end
|
17
|
-
@tao_ttl_erb = ERB.new(template,
|
18
|
-
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL,
|
17
|
+
@tao_ttl_erb = ERB.new(template, trim_mode: '-')
|
18
|
+
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, trim_mode: '-')
|
19
19
|
end
|
20
20
|
|
21
21
|
def rdfize(annotations_col, options = nil)
|
22
22
|
options ||= {}
|
23
|
-
only_prefixes = options
|
24
|
-
with_prefixes = options
|
23
|
+
only_prefixes = options[:only_prefixes] == true
|
24
|
+
with_prefixes = options[:with_prefixes] != true
|
25
25
|
|
26
26
|
# check the format
|
27
27
|
annotations_col.each do |annotations|
|
@@ -29,35 +29,33 @@ class TAO::RDFizer
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# namespaces
|
32
|
-
namespaces =
|
33
|
-
|
34
|
-
anns = annotations_col.first
|
35
|
-
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
32
|
+
namespaces = get_namespaces(annotations_col.first)
|
36
33
|
|
37
34
|
prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
|
38
35
|
|
39
|
-
if only_prefixes
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
if with_prefixes
|
44
|
-
prefixes_ttl + annotations_ttl
|
45
|
-
else
|
46
|
-
annotations_ttl
|
47
|
-
end
|
48
|
-
end
|
36
|
+
return prefixes_ttl if only_prefixes
|
37
|
+
|
38
|
+
annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
|
39
|
+
with_prefixes ? prefixes_ttl + annotations_ttl : annotations_ttl
|
49
40
|
end
|
50
41
|
|
51
42
|
private
|
52
43
|
|
53
|
-
def
|
54
|
-
|
44
|
+
def get_namespaces(annotations)
|
45
|
+
return {} if annotations[:namespaces].nil?
|
46
|
+
|
47
|
+
annotations[:namespaces].each_with_object({}) do |n, namespaces|
|
48
|
+
namespaces[n[:prefix]] = n[:uri]
|
49
|
+
end
|
50
|
+
end
|
55
51
|
|
52
|
+
def get_annotations_ttl(annotations_col, namespaces)
|
56
53
|
unless @mode ==:spans
|
54
|
+
anns = annotations_col.first
|
57
55
|
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
58
56
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
59
57
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
60
|
-
project_uri = '
|
58
|
+
project_uri = 'https://pubannotation.org/projects/' + anns[:project]
|
61
59
|
namespaces[prefix_for_this] = project_uri + '/'
|
62
60
|
end
|
63
61
|
|
@@ -70,8 +68,8 @@ class TAO::RDFizer
|
|
70
68
|
text = annotations[:text]
|
71
69
|
text_uri = annotations[:target]
|
72
70
|
text_id = begin
|
73
|
-
sourcedb, sourceid
|
74
|
-
|
71
|
+
sourcedb, sourceid = get_target_info(text_uri)
|
72
|
+
"#{sourcedb}-#{sourceid}"
|
75
73
|
end
|
76
74
|
|
77
75
|
# denotations and relations
|
@@ -153,37 +151,29 @@ class TAO::RDFizer
|
|
153
151
|
end
|
154
152
|
end
|
155
153
|
|
156
|
-
_spans.
|
154
|
+
_spans.sort_by! { |s| [s[:begin], -s[:end]] }
|
157
155
|
|
158
156
|
## begin indexing
|
159
157
|
len = text.length
|
160
158
|
num = _spans.length
|
161
159
|
|
162
|
-
#
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
end
|
160
|
+
# create the indexes for followings and children
|
161
|
+
_spans.each_with_index do |span, i|
|
162
|
+
span[:followings] = []
|
163
|
+
span[:children] = []
|
167
164
|
|
168
|
-
(0 ... num).each do |i|
|
169
|
-
# find the first following span
|
170
165
|
j = i + 1
|
171
|
-
|
172
|
-
|
173
|
-
unless include_parent?(_spans[i][:children], _spans[j])
|
174
|
-
_spans[i][:children] << _spans[j]
|
175
|
-
end
|
166
|
+
while j < num && _spans[j][:begin] < span[:end]
|
167
|
+
span[:children] << _spans[j] unless include_parent?(span[:children], _spans[j])
|
176
168
|
j += 1
|
177
169
|
end
|
178
170
|
|
179
|
-
|
180
|
-
fp = _spans[i][:end]
|
171
|
+
fp = span[:end]
|
181
172
|
fp += 1 while fp < len && text[fp].match(/\s/)
|
182
173
|
next if fp == len
|
183
174
|
|
184
|
-
# index adjacent spans
|
185
175
|
while j < num && _spans[j][:begin] == fp
|
186
|
-
|
176
|
+
span[:followings] << _spans[j]
|
187
177
|
j += 1
|
188
178
|
end
|
189
179
|
end
|
@@ -206,28 +196,26 @@ class TAO::RDFizer
|
|
206
196
|
def get_target_info (text_uri)
|
207
197
|
sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
|
208
198
|
sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
|
209
|
-
divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
|
210
199
|
|
211
|
-
return sourcedb, sourceid
|
200
|
+
return sourcedb, sourceid
|
212
201
|
end
|
213
202
|
|
214
|
-
def find_uri
|
203
|
+
def find_uri(label, namespaces, prefix_for_this)
|
204
|
+
@uri_cache ||= {}
|
205
|
+
|
206
|
+
return @uri_cache[label] if @uri_cache.key?(label)
|
207
|
+
|
215
208
|
if label.match(/\s/)
|
216
|
-
|
217
|
-
label.gsub(/\s/, '_')
|
209
|
+
label.gsub!(/\s/, '_')
|
218
210
|
end
|
219
211
|
delimiter_position = label.index(':')
|
220
|
-
if
|
221
|
-
label.gsub('(', '\(').gsub(')', '\)')
|
212
|
+
if delimiter_position && namespaces.key?(label[0...delimiter_position])
|
213
|
+
@uri_cache[label] = label.gsub('(', '\(').gsub(')', '\)')
|
222
214
|
elsif label =~ %r[^https?://]
|
223
|
-
"<#{label}>"
|
215
|
+
@uri_cache[label] = "<#{label}>"
|
224
216
|
else
|
225
|
-
clabel =
|
226
|
-
|
227
|
-
else
|
228
|
-
label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
229
|
-
end
|
230
|
-
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
217
|
+
clabel = label.match(/^\W+$/) ? 'SYM' : label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
218
|
+
@uri_cache[label] = namespaces.key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
231
219
|
end
|
232
220
|
end
|
233
221
|
|
@@ -274,7 +262,7 @@ class TAO::RDFizer
|
|
274
262
|
# variable: namespaces
|
275
263
|
ERB_PREFIXES_TTL = <<~HEREDOC
|
276
264
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
277
|
-
@prefix tao: <
|
265
|
+
@prefix tao: <https://pubannotation.org/ontology/tao.owl#> .
|
278
266
|
<%# namespaces -%>
|
279
267
|
<% namespaces.each_key do |p| -%>
|
280
268
|
<% if p == '_base' -%>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -33,14 +33,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
33
33
|
requirements:
|
34
34
|
- - ">="
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version:
|
36
|
+
version: 3.0.0
|
37
37
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
rubygems_version: 3.
|
43
|
+
rubygems_version: 3.5.11
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|