tao_rdfizer 0.9.11 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tao_rdfizer/tao_rdfizer.rb +55 -23
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 112cb7c3d78d77ca2df491e26f05eeb52334ed291483bb6bfa586bf707304608
|
4
|
+
data.tar.gz: e4bbdf5b014fed6779dc79f3b786a2d9d066a2a4dd9a8a1e8a87dec466d908ae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a291bf21a29a638ec0c3b42b16ff06e7530a8d7245965fb0901f7b4452358d6ecc7876a89c8d8c244a89fb7f47276c9105fb9b301311c09e51b86a5c0e1576dd
|
7
|
+
data.tar.gz: ad6a621f7a38146ee0ca0eccaa1b2c709a315b2d39bd000c801c4249f7fee0ecf22600fef4ff8baeb6160cf155e4fa033034d4aff5fa1d0d8cf4e9b78d16a5c8
|
@@ -19,7 +19,12 @@ class TAO::RDFizer
|
|
19
19
|
@prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
|
20
20
|
end
|
21
21
|
|
22
|
-
def rdfize(annotations_col)
|
22
|
+
def rdfize(annotations_col, with_prefix = true)
|
23
|
+
# check the format
|
24
|
+
annotations_col.each do |annotations|
|
25
|
+
raise "'target' is missing" unless annotations.has_key? :target
|
26
|
+
end
|
27
|
+
|
23
28
|
# namespaces
|
24
29
|
namespaces = {}
|
25
30
|
|
@@ -27,6 +32,7 @@ class TAO::RDFizer
|
|
27
32
|
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
28
33
|
|
29
34
|
unless @mode ==:spans
|
35
|
+
raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
|
30
36
|
prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
|
31
37
|
raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
|
32
38
|
project_uri = 'http://pubannotation.org/projects/' + anns[:project]
|
@@ -34,6 +40,7 @@ class TAO::RDFizer
|
|
34
40
|
end
|
35
41
|
|
36
42
|
denotations = []
|
43
|
+
attributes = []
|
37
44
|
relations = []
|
38
45
|
spans = []
|
39
46
|
|
@@ -46,34 +53,57 @@ class TAO::RDFizer
|
|
46
53
|
end
|
47
54
|
|
48
55
|
# denotations and relations
|
49
|
-
_denotations = annotations[:denotations]
|
50
|
-
|
51
|
-
|
52
|
-
_relations = [] if _relations.nil?
|
56
|
+
_denotations = annotations[:denotations] || []
|
57
|
+
_attributes = annotations[:attributes] || []
|
58
|
+
_relations = annotations[:relations] || []
|
53
59
|
if @mode == :spans && annotations.has_key?(:tracks)
|
54
60
|
annotations[:tracks].each do |track|
|
55
61
|
_denotations += track[:denotations]
|
62
|
+
_attributes += track[:attributes]
|
56
63
|
_relations += track[:relations]
|
57
64
|
end
|
58
65
|
end
|
59
66
|
|
60
67
|
begin
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
+
if @mode == :annotations
|
69
|
+
# index attributes
|
70
|
+
attributesh = _attributes.inject({}) do |h, a|
|
71
|
+
if a[:pred].end_with?('_id')
|
72
|
+
subj = a[:subj]
|
73
|
+
h[subj] = [] unless h.has_key? subj
|
74
|
+
h[subj] << a[:obj]
|
75
|
+
end
|
76
|
+
h
|
77
|
+
end
|
78
|
+
|
79
|
+
# denotations preprocessing
|
80
|
+
_denotations.each do |d|
|
81
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
82
|
+
d[:span_uri] = span_uri
|
83
|
+
d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
|
84
|
+
class_uris = (attributesh[d[:id]] || []).push(d[:obj])
|
85
|
+
d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
|
86
|
+
rescue ArgumentError => e
|
87
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
88
|
+
end
|
68
89
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
90
|
+
# relations preprocessing
|
91
|
+
_relations.each do |r|
|
92
|
+
r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
|
93
|
+
r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
|
94
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
|
95
|
+
rescue ArgumentError => e
|
96
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
|
97
|
+
end
|
98
|
+
else
|
99
|
+
# denotations preprocessing
|
100
|
+
_denotations.each do |d|
|
101
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
102
|
+
d[:span_uri] = span_uri
|
103
|
+
rescue ArgumentError => e
|
104
|
+
raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
|
105
|
+
end
|
74
106
|
end
|
75
|
-
rescue ArgumentError => e
|
76
|
-
raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
|
77
107
|
end
|
78
108
|
|
79
109
|
unless @mode == :annotations
|
@@ -88,7 +118,7 @@ class TAO::RDFizer
|
|
88
118
|
s[:text] = text[s[:begin] ... s[:end]]
|
89
119
|
end
|
90
120
|
|
91
|
-
# index
|
121
|
+
# index spans
|
92
122
|
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
93
123
|
|
94
124
|
# add denotation information
|
@@ -142,7 +172,9 @@ class TAO::RDFizer
|
|
142
172
|
spans += _spans unless @mode == :annotations
|
143
173
|
end
|
144
174
|
|
145
|
-
ttl
|
175
|
+
ttl = ''
|
176
|
+
ttl += @prefix_ttl_erb.result(binding) if with_prefix
|
177
|
+
ttl += @tao_ttl_erb.result(binding)
|
146
178
|
end
|
147
179
|
|
148
180
|
private
|
@@ -172,7 +204,7 @@ class TAO::RDFizer
|
|
172
204
|
clabel = if label.match(/^\W+$/)
|
173
205
|
'SYM'
|
174
206
|
else
|
175
|
-
label.sub(/^\W+/, '').sub(
|
207
|
+
label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
|
176
208
|
end
|
177
209
|
namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
|
178
210
|
end
|
@@ -181,7 +213,7 @@ class TAO::RDFizer
|
|
181
213
|
ERB_ANNOTATIONS_TTL = <<~HEREDOC
|
182
214
|
<% denotations.each do |d| -%>
|
183
215
|
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
184
|
-
|
216
|
+
<%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
|
185
217
|
<% end -%>
|
186
218
|
<%# relations -%>
|
187
219
|
<% relations.each do |r| -%>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
requirements: []
|
43
|
-
|
44
|
-
rubygems_version: 2.7.9
|
43
|
+
rubygems_version: 3.0.9
|
45
44
|
signing_key:
|
46
45
|
specification_version: 4
|
47
46
|
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|