tao_rdfizer 0.9.9 → 0.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +5 -5
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +58 -28
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 0254770e5fbff5ef2071765b19a324af5ad1a3cd
4
- data.tar.gz: 639d52929cfe8123766b540cc4f2a92ccf616f36
2
+ SHA256:
3
+ metadata.gz: a2f897b03c9e6856aaf752fd645808c47e6d52422ea1b6934b44b694d8d0d9a1
4
+ data.tar.gz: 8d9fb426258bf3fceabb6471a81fcaeb99cd18dcace251575a0f2f715fa4d787
5
5
  SHA512:
6
- metadata.gz: c9feeec71d327a4781036d16d5d2773f1fd77678ef23ae2f01b627bdb41f2ab04bf401c57204fdd96c27d237c65c8a9ec9da3060fd2206c4d43d7ca1cad4da50
7
- data.tar.gz: 67cd10929f38deb58b0e988547f7bde80fb8dd88e3c771fc034274ef374233bfbe13a5d72c8fd26127a1e8cf5c18c34d158493e11c16d718310b5ca40f37651b
6
+ metadata.gz: f0d230f25945bf45aed05f8f0cb3d46113d5eb4213b17ce4812cbfbcfa5636d540343b8ee8fe94cf1bbbc1a1eb4db35ac076bf1d7cd3cb7abdd92db446c94f41
7
+ data.tar.gz: 6f00c1aeb735b8e76da2e9185ad141c0c29c1d45d94faad1b3d89ec903a1c931e22957f4f356dce24ff6beadd53a77ac7fcd5aad2c17cb281a5d0128cd062b89
@@ -4,11 +4,11 @@ require 'erb'
4
4
  module TAO; end unless defined? TAO
5
5
 
6
6
  class TAO::RDFizer
7
- # if mode == :spans then produces span descriptions
8
- # if mode == :annotations then produces annotation descriptions
9
- # if mode == nil then produces both
10
- def initialize(mode = nil)
11
- @mode = mode
7
+ # if mode == :spans then produces span descriptions
8
+ # if mode == :annotations then produces annotation descriptions
9
+ # if mode == nil then produces both
10
+ def initialize(mode = nil)
11
+ @mode = mode
12
12
  template = if !mode.nil? && mode == :spans
13
13
  ERB_SPANS_TTL
14
14
  else
@@ -17,7 +17,7 @@ class TAO::RDFizer
17
17
 
18
18
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
19
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
- end
20
+ end
21
21
 
22
22
  def rdfize(annotations_col)
23
23
  # namespaces
@@ -27,6 +27,7 @@ class TAO::RDFizer
27
27
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
28
 
29
29
  unless @mode ==:spans
30
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
31
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
32
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
33
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +35,7 @@ class TAO::RDFizer
34
35
  end
35
36
 
36
37
  denotations = []
38
+ attributes = []
37
39
  relations = []
38
40
  spans = []
39
41
 
@@ -46,30 +48,57 @@ class TAO::RDFizer
46
48
  end
47
49
 
48
50
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
51
+ _denotations = annotations[:denotations] || []
52
+ _attributes = annotations[:attributes] || []
53
+ _relations = annotations[:relations] || []
53
54
  if @mode == :spans && annotations.has_key?(:tracks)
54
55
  annotations[:tracks].each do |track|
55
56
  _denotations += track[:denotations]
57
+ _attributes += track[:attributes]
56
58
  _relations += track[:relations]
57
59
  end
58
60
  end
59
61
 
60
- # denotations preprocessing
61
- _denotations.each do |d|
62
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
63
- d[:span_uri] = span_uri
64
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
65
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
66
- end
62
+ begin
63
+ if @mode == :annotations
64
+ # index attributes
65
+ attributesh = _attributes.inject({}) do |h, a|
66
+ if a[:pred].end_with?('_id')
67
+ subj = a[:subj]
68
+ h[subj] = [] unless h.has_key? subj
69
+ h[subj] << a[:obj]
70
+ end
71
+ h
72
+ end
67
73
 
68
- # relations preprocessing
69
- _relations.each do |r|
70
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
71
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
72
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
74
+ # denotations preprocessing
75
+ _denotations.each do |d|
76
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
77
+ d[:span_uri] = span_uri
78
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
79
+ class_uris = attributesh[d[:id]].push(d[:obj])
80
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
81
+ rescue ArgumentError => e
82
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
83
+ end
84
+
85
+ # relations preprocessing
86
+ _relations.each do |r|
87
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
88
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
89
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
90
+ rescue ArgumentError => e
91
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
92
+ end
93
+ else
94
+ # denotations preprocessing
95
+ _denotations.each do |d|
96
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
97
+ d[:span_uri] = span_uri
98
+ rescue ArgumentError => e
99
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
100
+ end
101
+ end
73
102
  end
74
103
 
75
104
  unless @mode == :annotations
@@ -84,7 +113,7 @@ class TAO::RDFizer
84
113
  s[:text] = text[s[:begin] ... s[:end]]
85
114
  end
86
115
 
87
- # index
116
+ # index spans
88
117
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
89
118
 
90
119
  # add denotation information
@@ -158,25 +187,26 @@ class TAO::RDFizer
158
187
  end
159
188
 
160
189
  def find_uri (label, namespaces, prefix_for_this)
190
+ raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
161
191
  delimiter_position = label.index(':')
162
192
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
163
- label
193
+ label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
164
194
  elsif label =~ %r[^https?://]
165
195
  "<#{label}>"
166
196
  else
167
197
  clabel = if label.match(/^\W+$/)
168
198
  'SYM'
169
199
  else
170
- label.sub(/^\W+/, '').sub(/\W+$/, '').gsub(/ +/, '_')
200
+ label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
171
201
  end
172
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel}"
202
+ namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
173
203
  end
174
204
  end
175
205
 
176
206
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
177
207
  <% denotations.each do |d| -%>
178
208
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
179
- rdf:type <%= d[:cls_uri] %> .
209
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
180
210
  <% end -%>
181
211
  <%# relations -%>
182
212
  <% relations.each do |r| -%>
@@ -193,7 +223,7 @@ class TAO::RDFizer
193
223
  <% s[:children].each do |s| -%>
194
224
  tao:contains <%= s[:span_uri] %> ;
195
225
  <% end -%>
196
- tao:has_text <%= s[:text].dump %> ;
226
+ tao:has_text <%= s[:text].inspect %> ;
197
227
  tao:belongs_to <<%= s[:source_uri] %>> ;
198
228
  tao:begins_at <%= s[:begin] %> ;
199
229
  tao:ends_at <%= s[:end] %> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.9
4
+ version: '0.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-24 00:00:00.000000000 Z
11
+ date: 2020-12-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.6.11
43
+ rubygems_version: 3.0.8
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.