tao_rdfizer 0.11.3 → 0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tao_rdfizer/tao_rdfizer.rb +45 -55
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f8e898ea335e5047e9b98c5f61d91f746ae35cdeae4e74c0fea729533f6c4a8
|
4
|
+
data.tar.gz: 9718f84c225451729468700d5ffc54952150de9f178c713b6bb82a13b440dfca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53c9ab169c713eb9cfaef0199c6497e39ba33ca4b9645699b2217376a266fa25fd60d70a97e47db14b10429760b6618614c140c170a60d85caf065ca3c3d8498
|
7
|
+
data.tar.gz: 5e9673f10325c1e10044be8d9c89819b21b60b056b0a9591eed83290763d038aefe49906a201022722a31b7bf8b50990b59abd2fd8439d7d1d00da48784075cc
|
@@ -14,14 +14,14 @@ class TAO::RDFizer
|
|
14
14
|
else
|
15
15
|
ERB_ANNOTATIONS_TTL
|
16
16
|
end
|
17
|
-
@tao_ttl_erb = ERB.new(template,
|
18
|
-
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL,
|
17
|
+
@tao_ttl_erb = ERB.new(template, trim_mode: '-')
|
18
|
+
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, trim_mode: '-')
|
19
19
|
end
|
20
20
|
|
21
21
|
def rdfize(annotations_col, options = nil)
|
22
22
|
options ||= {}
|
23
|
-
only_prefixes = options
|
24
|
-
with_prefixes = options
|
23
|
+
only_prefixes = options[:only_prefixes] == true
|
24
|
+
with_prefixes = options[:with_prefixes] != true
|
25
25
|
|
26
26
|
# check the format
|
27
27
|
annotations_col.each do |annotations|
|
@@ -29,35 +29,33 @@ class TAO::RDFizer
|
|
29
29
|
end
|
30
30
|
|
31
31
|
# namespaces
|
32
|
-
namespaces =
|
33
|
-
|
34
|
-
anns = annotations_col.first
|
35
|
-
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
32
|
+
namespaces = get_namespaces(annotations_col.first)
|
36
33
|
|
37
34
|
prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
|
38
35
|
|
39
|
-
if only_prefixes
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
if with_prefixes
|
44
|
-
prefixes_ttl + annotations_ttl
|
45
|
-
else
|
46
|
-
annotations_ttl
|
47
|
-
end
|
48
|
-
end
|
36
|
+
return prefixes_ttl if only_prefixes
|
37
|
+
|
38
|
+
annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
|
39
|
+
with_prefixes ? prefixes_ttl + annotations_ttl : annotations_ttl
|
49
40
|
end
|
50
41
|
|
51
42
|
private
|
52
43
|
|
53
|
-
def
|
54
|
-
|
44
|
+
def get_namespaces(annotations)
|
45
|
+
return {} if annotations[:namespaces].nil?
|
46
|
+
|
47
|
+
annotations[:namespaces].each_with_object({}) do |n, namespaces|
|
48
|
+
namespaces[n[:prefix]] = n[:uri]
|
49
|
+
end
|
50
|
+
end
|
55
51
|
|
52
|
+
def get_annotations_ttl(annotations_col, namespaces)
|
56
53
|
unless @mode ==:spans
|
54
|
+
anns = annotations_col.first
|
57
55
|
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
58
56
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
59
57
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
60
|
-
project_uri = '
|
58
|
+
project_uri = 'https://pubannotation.org/projects/' + anns[:project]
|
61
59
|
namespaces[prefix_for_this] = project_uri + '/'
|
62
60
|
end
|
63
61
|
|
@@ -70,12 +68,14 @@ class TAO::RDFizer
|
|
70
68
|
text = annotations[:text]
|
71
69
|
text_uri = annotations[:target]
|
72
70
|
text_id = begin
|
73
|
-
sourcedb, sourceid
|
74
|
-
|
71
|
+
sourcedb, sourceid = get_target_info(text_uri)
|
72
|
+
"#{sourcedb}-#{sourceid}"
|
75
73
|
end
|
76
74
|
|
77
75
|
# denotations and relations
|
78
76
|
_denotations = annotations[:denotations] || []
|
77
|
+
_blocks = annotations[:blocks] || []
|
78
|
+
_denotations += _blocks
|
79
79
|
_attributes = annotations[:attributes] || []
|
80
80
|
_relations = annotations[:relations] || []
|
81
81
|
if @mode == :spans && annotations.has_key?(:tracks)
|
@@ -153,37 +153,29 @@ class TAO::RDFizer
|
|
153
153
|
end
|
154
154
|
end
|
155
155
|
|
156
|
-
_spans.
|
156
|
+
_spans.sort_by! { |s| [s[:begin], -s[:end]] }
|
157
157
|
|
158
158
|
## begin indexing
|
159
159
|
len = text.length
|
160
160
|
num = _spans.length
|
161
161
|
|
162
|
-
#
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
end
|
162
|
+
# create the indexes for followings and children
|
163
|
+
_spans.each_with_index do |span, i|
|
164
|
+
span[:followings] = []
|
165
|
+
span[:children] = []
|
167
166
|
|
168
|
-
(0 ... num).each do |i|
|
169
|
-
# find the first following span
|
170
167
|
j = i + 1
|
171
|
-
|
172
|
-
|
173
|
-
unless include_parent?(_spans[i][:children], _spans[j])
|
174
|
-
_spans[i][:children] << _spans[j]
|
175
|
-
end
|
168
|
+
while j < num && _spans[j][:begin] < span[:end]
|
169
|
+
span[:children] << _spans[j] unless include_parent?(span[:children], _spans[j])
|
176
170
|
j += 1
|
177
171
|
end
|
178
172
|
|
179
|
-
|
180
|
-
fp = _spans[i][:end]
|
173
|
+
fp = span[:end]
|
181
174
|
fp += 1 while fp < len && text[fp].match(/\s/)
|
182
175
|
next if fp == len
|
183
176
|
|
184
|
-
# index adjacent spans
|
185
177
|
while j < num && _spans[j][:begin] == fp
|
186
|
-
|
178
|
+
span[:followings] << _spans[j]
|
187
179
|
j += 1
|
188
180
|
end
|
189
181
|
end
|
@@ -206,28 +198,26 @@ class TAO::RDFizer
|
|
206
198
|
def get_target_info (text_uri)
|
207
199
|
sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
|
208
200
|
sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
|
209
|
-
divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
|
210
201
|
|
211
|
-
return sourcedb, sourceid
|
202
|
+
return sourcedb, sourceid
|
212
203
|
end
|
213
204
|
|
214
|
-
def find_uri
|
205
|
+
def find_uri(label, namespaces, prefix_for_this)
|
206
|
+
@uri_cache ||= {}
|
207
|
+
|
208
|
+
return @uri_cache[label] if @uri_cache.key?(label)
|
209
|
+
|
215
210
|
if label.match(/\s/)
|
216
|
-
|
217
|
-
label.gsub(/\s/, '_')
|
211
|
+
label.gsub!(/\s/, '_')
|
218
212
|
end
|
219
213
|
delimiter_position = label.index(':')
|
220
|
-
if
|
221
|
-
label.gsub('(', '\(').gsub(')', '\)')
|
214
|
+
if delimiter_position && namespaces.key?(label[0...delimiter_position])
|
215
|
+
@uri_cache[label] = label.gsub('(', '\(').gsub(')', '\)')
|
222
216
|
elsif label =~ %r[^https?://]
|
223
|
-
"<#{label}>"
|
217
|
+
@uri_cache[label] = "<#{label}>"
|
224
218
|
else
|
225
|
-
clabel =
|
226
|
-
|
227
|
-
else
|
228
|
-
label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
229
|
-
end
|
230
|
-
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
219
|
+
clabel = label.match(/^\W+$/) ? 'SYM' : label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
220
|
+
@uri_cache[label] = namespaces.key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
231
221
|
end
|
232
222
|
end
|
233
223
|
|
@@ -274,7 +264,7 @@ class TAO::RDFizer
|
|
274
264
|
# variable: namespaces
|
275
265
|
ERB_PREFIXES_TTL = <<~HEREDOC
|
276
266
|
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
277
|
-
@prefix tao: <
|
267
|
+
@prefix tao: <https://pubannotation.org/ontology/tao.owl#> .
|
278
268
|
<%# namespaces -%>
|
279
269
|
<% namespaces.each_key do |p| -%>
|
280
270
|
<% if p == '_base' -%>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.12'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-12-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -33,14 +33,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
33
33
|
requirements:
|
34
34
|
- - ">="
|
35
35
|
- !ruby/object:Gem::Version
|
36
|
-
version:
|
36
|
+
version: 3.0.0
|
37
37
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
rubygems_version: 3.
|
43
|
+
rubygems_version: 3.5.11
|
44
44
|
signing_key:
|
45
45
|
specification_version: 4
|
46
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|