tao_rdfizer 0.9.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +5 -5
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +58 -28
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 0254770e5fbff5ef2071765b19a324af5ad1a3cd
4
- data.tar.gz: 639d52929cfe8123766b540cc4f2a92ccf616f36
2
+ SHA256:
3
+ metadata.gz: a2f897b03c9e6856aaf752fd645808c47e6d52422ea1b6934b44b694d8d0d9a1
4
+ data.tar.gz: 8d9fb426258bf3fceabb6471a81fcaeb99cd18dcace251575a0f2f715fa4d787
5
5
  SHA512:
6
- metadata.gz: c9feeec71d327a4781036d16d5d2773f1fd77678ef23ae2f01b627bdb41f2ab04bf401c57204fdd96c27d237c65c8a9ec9da3060fd2206c4d43d7ca1cad4da50
7
- data.tar.gz: 67cd10929f38deb58b0e988547f7bde80fb8dd88e3c771fc034274ef374233bfbe13a5d72c8fd26127a1e8cf5c18c34d158493e11c16d718310b5ca40f37651b
6
+ metadata.gz: f0d230f25945bf45aed05f8f0cb3d46113d5eb4213b17ce4812cbfbcfa5636d540343b8ee8fe94cf1bbbc1a1eb4db35ac076bf1d7cd3cb7abdd92db446c94f41
7
+ data.tar.gz: 6f00c1aeb735b8e76da2e9185ad141c0c29c1d45d94faad1b3d89ec903a1c931e22957f4f356dce24ff6beadd53a77ac7fcd5aad2c17cb281a5d0128cd062b89
@@ -4,11 +4,11 @@ require 'erb'
4
4
  module TAO; end unless defined? TAO
5
5
 
6
6
  class TAO::RDFizer
7
- # if mode == :spans then produces span descriptions
8
- # if mode == :annotations then produces annotation descriptions
9
- # if mode == nil then produces both
10
- def initialize(mode = nil)
11
- @mode = mode
7
+ # if mode == :spans then produces span descriptions
8
+ # if mode == :annotations then produces annotation descriptions
9
+ # if mode == nil then produces both
10
+ def initialize(mode = nil)
11
+ @mode = mode
12
12
  template = if !mode.nil? && mode == :spans
13
13
  ERB_SPANS_TTL
14
14
  else
@@ -17,7 +17,7 @@ class TAO::RDFizer
17
17
 
18
18
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
19
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
- end
20
+ end
21
21
 
22
22
  def rdfize(annotations_col)
23
23
  # namespaces
@@ -27,6 +27,7 @@ class TAO::RDFizer
27
27
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
28
 
29
29
  unless @mode ==:spans
30
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
31
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
32
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
33
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +35,7 @@ class TAO::RDFizer
34
35
  end
35
36
 
36
37
  denotations = []
38
+ attributes = []
37
39
  relations = []
38
40
  spans = []
39
41
 
@@ -46,30 +48,57 @@ class TAO::RDFizer
46
48
  end
47
49
 
48
50
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
51
+ _denotations = annotations[:denotations] || []
52
+ _attributes = annotations[:attributes] || []
53
+ _relations = annotations[:relations] || []
53
54
  if @mode == :spans && annotations.has_key?(:tracks)
54
55
  annotations[:tracks].each do |track|
55
56
  _denotations += track[:denotations]
57
+ _attributes += track[:attributes]
56
58
  _relations += track[:relations]
57
59
  end
58
60
  end
59
61
 
60
- # denotations preprocessing
61
- _denotations.each do |d|
62
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
63
- d[:span_uri] = span_uri
64
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
65
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
66
- end
62
+ begin
63
+ if @mode == :annotations
64
+ # index attributes
65
+ attributesh = _attributes.inject({}) do |h, a|
66
+ if a[:pred].end_with?('_id')
67
+ subj = a[:subj]
68
+ h[subj] = [] unless h.has_key? subj
69
+ h[subj] << a[:obj]
70
+ end
71
+ h
72
+ end
67
73
 
68
- # relations preprocessing
69
- _relations.each do |r|
70
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
71
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
72
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
74
+ # denotations preprocessing
75
+ _denotations.each do |d|
76
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
77
+ d[:span_uri] = span_uri
78
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
79
+ class_uris = attributesh[d[:id]].push(d[:obj])
80
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
81
+ rescue ArgumentError => e
82
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
83
+ end
84
+
85
+ # relations preprocessing
86
+ _relations.each do |r|
87
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
88
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
89
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
90
+ rescue ArgumentError => e
91
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
92
+ end
93
+ else
94
+ # denotations preprocessing
95
+ _denotations.each do |d|
96
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
97
+ d[:span_uri] = span_uri
98
+ rescue ArgumentError => e
99
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
100
+ end
101
+ end
73
102
  end
74
103
 
75
104
  unless @mode == :annotations
@@ -84,7 +113,7 @@ class TAO::RDFizer
84
113
  s[:text] = text[s[:begin] ... s[:end]]
85
114
  end
86
115
 
87
- # index
116
+ # index spans
88
117
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
89
118
 
90
119
  # add denotation information
@@ -158,25 +187,26 @@ class TAO::RDFizer
158
187
  end
159
188
 
160
189
  def find_uri (label, namespaces, prefix_for_this)
190
+ raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
161
191
  delimiter_position = label.index(':')
162
192
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
163
- label
193
+ label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
164
194
  elsif label =~ %r[^https?://]
165
195
  "<#{label}>"
166
196
  else
167
197
  clabel = if label.match(/^\W+$/)
168
198
  'SYM'
169
199
  else
170
- label.sub(/^\W+/, '').sub(/\W+$/, '').gsub(/ +/, '_')
200
+ label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
171
201
  end
172
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
202
+ namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
173
203
  end
174
204
  end
175
205
 
176
206
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
177
207
  <% denotations.each do |d| -%>
178
208
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
179
- rdf:type <%= d[:cls_uri] %> .
209
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
180
210
  <% end -%>
181
211
  <%# relations -%>
182
212
  <% relations.each do |r| -%>
@@ -193,7 +223,7 @@ class TAO::RDFizer
193
223
  <% s[:children].each do |s| -%>
194
224
  tao:contains <%= s[:span_uri] %> ;
195
225
  <% end -%>
196
- tao:has_text <%= s[:text].dump %> ;
226
+ tao:has_text <%= s[:text].inspect %> ;
197
227
  tao:belongs_to <<%= s[:source_uri] %>> ;
198
228
  tao:begins_at <%= s[:begin] %> ;
199
229
  tao:ends_at <%= s[:end] %> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.9
4
+ version: '0.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-24 00:00:00.000000000 Z
11
+ date: 2020-12-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.6.11
43
+ rubygems_version: 3.0.8
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.