tao_rdfizer 0.9.10 → 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +48 -23
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 839f30915cc8896477342a8cfdb87d8d4f23d8ffbbc133b9b0b087939050befe
4
- data.tar.gz: bfa3efcbae03859f8a778afbf81f37daffec3467b537671cf5282c0905ed1d5f
3
+ metadata.gz: 4baf53459b357e899ab4e569f15ba00e4240653641912488c2478ef0a65ac2bf
4
+ data.tar.gz: cd7d1a7121abbd4f725b1cbf5dff8cf28db27efdcab025e1c43305d0350157ab
5
5
  SHA512:
6
- metadata.gz: 2a9744b372d7a9774774262a17771c43533b44c3a95ad19cd7e60add25d6135afb3c85ecad8d6933ffdff61b39632be0386ecf9d85340f1f0bea1efc9176e8d5
7
- data.tar.gz: 77f1d13bf567b6e5ed28668034da90e0810520358e2aa2d27dc747ce78f090316b1519be8f1b2a30050fce5a20408b46a932887c955979a217304c4a53cd709e
6
+ metadata.gz: ba755e5d41a06b36d592dc52261ed65b9018c47ed4da5b29fa0bf2a49196e0ae29c046c1cf463be9bbaad510ddca2bed464ce17898c58480e3258668374a7675
7
+ data.tar.gz: cb71ac538c0573812317caa83a54b3069dca0221d85388dd2ede2e1251b4d820379e43ac45744dbc450faf4a3dcd0db542832075a4febdc03389d956a7cbd9ff
@@ -27,6 +27,7 @@ class TAO::RDFizer
27
27
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
28
 
29
29
  unless @mode ==:spans
30
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
31
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
32
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
33
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +35,7 @@ class TAO::RDFizer
34
35
  end
35
36
 
36
37
  denotations = []
38
+ attributes = []
37
39
  relations = []
38
40
  spans = []
39
41
 
@@ -46,34 +48,57 @@ class TAO::RDFizer
46
48
  end
47
49
 
48
50
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
51
+ _denotations = annotations[:denotations] || []
52
+ _attributes = annotations[:attributes] || []
53
+ _relations = annotations[:relations] || []
53
54
  if @mode == :spans && annotations.has_key?(:tracks)
54
55
  annotations[:tracks].each do |track|
55
56
  _denotations += track[:denotations]
57
+ _attributes += track[:attributes]
56
58
  _relations += track[:relations]
57
59
  end
58
60
  end
59
61
 
60
62
  begin
61
- # denotations preprocessing
62
- _denotations.each do |d|
63
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
64
- d[:span_uri] = span_uri
65
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
66
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
67
- end
63
+ if @mode == :annotations
64
+ # index attributes
65
+ attributesh = _attributes.inject({}) do |h, a|
66
+ if a[:pred].end_with?('_id')
67
+ subj = a[:subj]
68
+ h[subj] = [] unless h.has_key? subj
69
+ h[subj] << a[:obj]
70
+ end
71
+ h
72
+ end
68
73
 
69
- # relations preprocessing
70
- _relations.each do |r|
71
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
72
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
73
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
74
+ # denotations preprocessing
75
+ _denotations.each do |d|
76
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
77
+ d[:span_uri] = span_uri
78
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
79
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
80
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
81
+ rescue ArgumentError => e
82
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
83
+ end
84
+
85
+ # relations preprocessing
86
+ _relations.each do |r|
87
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
88
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
89
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
90
+ rescue ArgumentError => e
91
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
92
+ end
93
+ else
94
+ # denotations preprocessing
95
+ _denotations.each do |d|
96
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
97
+ d[:span_uri] = span_uri
98
+ rescue ArgumentError => e
99
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
100
+ end
74
101
  end
75
- rescue ArgumentError => e
76
- raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
77
102
  end
78
103
 
79
104
  unless @mode == :annotations
@@ -88,7 +113,7 @@ class TAO::RDFizer
88
113
  s[:text] = text[s[:begin] ... s[:end]]
89
114
  end
90
115
 
91
- # index
116
+ # index spans
92
117
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
93
118
 
94
119
  # add denotation information
@@ -165,23 +190,23 @@ class TAO::RDFizer
165
190
  raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
166
191
  delimiter_position = label.index(':')
167
192
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
168
- label
193
+ label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
169
194
  elsif label =~ %r[^https?://]
170
195
  "<#{label}>"
171
196
  else
172
197
  clabel = if label.match(/^\W+$/)
173
198
  'SYM'
174
199
  else
175
- label.sub(/^\W+/, '').sub(/\W+$/, '').gsub(/ +/, '_')
200
+ label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
176
201
  end
177
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
202
+ namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
178
203
  end
179
204
  end
180
205
 
181
206
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
182
207
  <% denotations.each do |d| -%>
183
208
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
184
- rdf:type <%= d[:cls_uri] %> .
209
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
185
210
  <% end -%>
186
211
  <%# relations -%>
187
212
  <% relations.each do |r| -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.10
4
+ version: 0.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-11 00:00:00.000000000 Z
11
+ date: 2020-12-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.8
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.