tao_rdfizer 0.9.10 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +48 -23
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 839f30915cc8896477342a8cfdb87d8d4f23d8ffbbc133b9b0b087939050befe
4
- data.tar.gz: bfa3efcbae03859f8a778afbf81f37daffec3467b537671cf5282c0905ed1d5f
3
+ metadata.gz: 4baf53459b357e899ab4e569f15ba00e4240653641912488c2478ef0a65ac2bf
4
+ data.tar.gz: cd7d1a7121abbd4f725b1cbf5dff8cf28db27efdcab025e1c43305d0350157ab
5
5
  SHA512:
6
- metadata.gz: 2a9744b372d7a9774774262a17771c43533b44c3a95ad19cd7e60add25d6135afb3c85ecad8d6933ffdff61b39632be0386ecf9d85340f1f0bea1efc9176e8d5
7
- data.tar.gz: 77f1d13bf567b6e5ed28668034da90e0810520358e2aa2d27dc747ce78f090316b1519be8f1b2a30050fce5a20408b46a932887c955979a217304c4a53cd709e
6
+ metadata.gz: ba755e5d41a06b36d592dc52261ed65b9018c47ed4da5b29fa0bf2a49196e0ae29c046c1cf463be9bbaad510ddca2bed464ce17898c58480e3258668374a7675
7
+ data.tar.gz: cb71ac538c0573812317caa83a54b3069dca0221d85388dd2ede2e1251b4d820379e43ac45744dbc450faf4a3dcd0db542832075a4febdc03389d956a7cbd9ff
@@ -27,6 +27,7 @@ class TAO::RDFizer
27
27
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
28
 
29
29
  unless @mode ==:spans
30
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
31
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
32
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
33
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +35,7 @@ class TAO::RDFizer
34
35
  end
35
36
 
36
37
  denotations = []
38
+ attributes = []
37
39
  relations = []
38
40
  spans = []
39
41
 
@@ -46,34 +48,57 @@ class TAO::RDFizer
46
48
  end
47
49
 
48
50
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
51
+ _denotations = annotations[:denotations] || []
52
+ _attributes = annotations[:attributes] || []
53
+ _relations = annotations[:relations] || []
53
54
  if @mode == :spans && annotations.has_key?(:tracks)
54
55
  annotations[:tracks].each do |track|
55
56
  _denotations += track[:denotations]
57
+ _attributes += track[:attributes]
56
58
  _relations += track[:relations]
57
59
  end
58
60
  end
59
61
 
60
62
  begin
61
- # denotations preprocessing
62
- _denotations.each do |d|
63
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
64
- d[:span_uri] = span_uri
65
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
66
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
67
- end
63
+ if @mode == :annotations
64
+ # index attributes
65
+ attributesh = _attributes.inject({}) do |h, a|
66
+ if a[:pred].end_with?('_id')
67
+ subj = a[:subj]
68
+ h[subj] = [] unless h.has_key? subj
69
+ h[subj] << a[:obj]
70
+ end
71
+ h
72
+ end
68
73
 
69
- # relations preprocessing
70
- _relations.each do |r|
71
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
72
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
73
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
74
+ # denotations preprocessing
75
+ _denotations.each do |d|
76
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
77
+ d[:span_uri] = span_uri
78
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
79
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
80
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
81
+ rescue ArgumentError => e
82
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
83
+ end
84
+
85
+ # relations preprocessing
86
+ _relations.each do |r|
87
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
88
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
89
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
90
+ rescue ArgumentError => e
91
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
92
+ end
93
+ else
94
+ # denotations preprocessing
95
+ _denotations.each do |d|
96
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
97
+ d[:span_uri] = span_uri
98
+ rescue ArgumentError => e
99
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
100
+ end
74
101
  end
75
- rescue ArgumentError => e
76
- raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
77
102
  end
78
103
 
79
104
  unless @mode == :annotations
@@ -88,7 +113,7 @@ class TAO::RDFizer
88
113
  s[:text] = text[s[:begin] ... s[:end]]
89
114
  end
90
115
 
91
- # index
116
+ # index spans
92
117
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
93
118
 
94
119
  # add denotation information
@@ -165,23 +190,23 @@ class TAO::RDFizer
165
190
  raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
166
191
  delimiter_position = label.index(':')
167
192
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
168
- label
193
+ label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
169
194
  elsif label =~ %r[^https?://]
170
195
  "<#{label}>"
171
196
  else
172
197
  clabel = if label.match(/^\W+$/)
173
198
  'SYM'
174
199
  else
175
- label.sub(/^\W+/, '').sub(/\W+$/, '').gsub(/ +/, '_')
200
+ label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
176
201
  end
177
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
202
+ namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
178
203
  end
179
204
  end
180
205
 
181
206
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
182
207
  <% denotations.each do |d| -%>
183
208
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
184
- rdf:type <%= d[:cls_uri] %> .
209
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
185
210
  <% end -%>
186
211
  <%# relations -%>
187
212
  <% relations.each do |r| -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.10
4
+ version: 0.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-11 00:00:00.000000000 Z
11
+ date: 2020-12-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.8
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.