tao_rdfizer 0.11.3 → 0.11.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +43 -55
  3. metadata +4 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 122108e353debbc7d153a9aaa4f4a7b1d0d964ab1f24697443d2e65977e61efa
4
- data.tar.gz: 2a62939c0ac1bf15e1225a1512d3d797c379ddb563bfeb422996c752b1865f61
3
+ metadata.gz: 59d7999a921ef33104728a1f86edbeb7f20161641b37a53435daf316772ffed8
4
+ data.tar.gz: d6cd66e38eececa7b7cbb251686d148a4924da16ad751eea3af2cef21b00121a
5
5
  SHA512:
6
- metadata.gz: 6b25ad6199e58d03b7e8acdf4c60a8a8c9c87b634940edddebb73b696a5e4bb37b96b7d74cb818acb93b951ea5977ae72ec8273c3ec264d3c73d6e381efe1092
7
- data.tar.gz: 5085e04e204ef0eb8f1282e0f659a2cc60eae7ef70283c658072b310683f016cf2c1806ff71cad66311950b246eb99a402e502d799f998acd34434ea9d3bc4ff
6
+ metadata.gz: 4e98ac931a0f2913f44e8e234222f55c93ff9bbd76173787393fcf1e866a14b7707168c8446f33cfd00f208599439cacdd67b4b6296c9ab8a1288a3963fd4f0c
7
+ data.tar.gz: 788aaf5c427d44783edadfc30f84a8d2cc7aac99641575dd3999f780520c374cf5c443fc9a5dd656424a4291940861fa268a37b6ca4f3dab9a2ae196f6df2bc8
@@ -14,14 +14,14 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
- @tao_ttl_erb = ERB.new(template, nil, '-')
18
- @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
17
+ @tao_ttl_erb = ERB.new(template, trim_mode: '-')
18
+ @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, trim_mode: '-')
19
19
  end
20
20
 
21
21
  def rdfize(annotations_col, options = nil)
22
22
  options ||= {}
23
- only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
- with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
23
+ only_prefixes = options[:only_prefixes] == true
24
+ with_prefixes = options[:with_prefixes] != true
25
25
 
26
26
  # check the format
27
27
  annotations_col.each do |annotations|
@@ -29,35 +29,33 @@ class TAO::RDFizer
29
29
  end
30
30
 
31
31
  # namespaces
32
- namespaces = {}
33
-
34
- anns = annotations_col.first
35
- anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
32
+ namespaces = get_namespaces(annotations_col.first)
36
33
 
37
34
  prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
35
 
39
- if only_prefixes
40
- prefixes_ttl
41
- else
42
- annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
- if with_prefixes
44
- prefixes_ttl + annotations_ttl
45
- else
46
- annotations_ttl
47
- end
48
- end
36
+ return prefixes_ttl if only_prefixes
37
+
38
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
39
+ with_prefixes ? prefixes_ttl + annotations_ttl : annotations_ttl
49
40
  end
50
41
 
51
42
  private
52
43
 
53
- def get_annotations_ttl(annotations_col, namespaces)
54
- anns = annotations_col.first
44
+ def get_namespaces(annotations)
45
+ return {} if annotations[:namespaces].nil?
46
+
47
+ annotations[:namespaces].each_with_object({}) do |n, namespaces|
48
+ namespaces[n[:prefix]] = n[:uri]
49
+ end
50
+ end
55
51
 
52
+ def get_annotations_ttl(annotations_col, namespaces)
56
53
  unless @mode ==:spans
54
+ anns = annotations_col.first
57
55
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
58
56
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
59
57
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
60
- project_uri = 'http://pubannotation.org/projects/' + anns[:project]
58
+ project_uri = 'https://pubannotation.org/projects/' + anns[:project]
61
59
  namespaces[prefix_for_this] = project_uri + '/'
62
60
  end
63
61
 
@@ -70,8 +68,8 @@ class TAO::RDFizer
70
68
  text = annotations[:text]
71
69
  text_uri = annotations[:target]
72
70
  text_id = begin
73
- sourcedb, sourceid, divid = get_target_info(text_uri)
74
- divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
71
+ sourcedb, sourceid = get_target_info(text_uri)
72
+ "#{sourcedb}-#{sourceid}"
75
73
  end
76
74
 
77
75
  # denotations and relations
@@ -153,37 +151,29 @@ class TAO::RDFizer
153
151
  end
154
152
  end
155
153
 
156
- _spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
154
+ _spans.sort_by! { |s| [s[:begin], -s[:end]] }
157
155
 
158
156
  ## begin indexing
159
157
  len = text.length
160
158
  num = _spans.length
161
159
 
162
- # initilaize the index
163
- (0 ... num).each do |i|
164
- _spans[i][:followings] = []
165
- _spans[i][:children] = []
166
- end
160
+ # create the indexes for followings and children
161
+ _spans.each_with_index do |span, i|
162
+ span[:followings] = []
163
+ span[:children] = []
167
164
 
168
- (0 ... num).each do |i|
169
- # find the first following span
170
165
  j = i + 1
171
-
172
- while j < num && _spans[j][:begin] < _spans[i][:end]
173
- unless include_parent?(_spans[i][:children], _spans[j])
174
- _spans[i][:children] << _spans[j]
175
- end
166
+ while j < num && _spans[j][:begin] < span[:end]
167
+ span[:children] << _spans[j] unless include_parent?(span[:children], _spans[j])
176
168
  j += 1
177
169
  end
178
170
 
179
- # find adjacent positions
180
- fp = _spans[i][:end]
171
+ fp = span[:end]
181
172
  fp += 1 while fp < len && text[fp].match(/\s/)
182
173
  next if fp == len
183
174
 
184
- # index adjacent spans
185
175
  while j < num && _spans[j][:begin] == fp
186
- _spans[i][:followings] << _spans[j]
176
+ span[:followings] << _spans[j]
187
177
  j += 1
188
178
  end
189
179
  end
@@ -206,28 +196,26 @@ class TAO::RDFizer
206
196
  def get_target_info (text_uri)
207
197
  sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
208
198
  sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
209
- divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
210
199
 
211
- return sourcedb, sourceid, divid
200
+ return sourcedb, sourceid
212
201
  end
213
202
 
214
- def find_uri (label, namespaces, prefix_for_this)
203
+ def find_uri(label, namespaces, prefix_for_this)
204
+ @uri_cache ||= {}
205
+
206
+ return @uri_cache[label] if @uri_cache.key?(label)
207
+
215
208
  if label.match(/\s/)
216
- # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
- label.gsub(/\s/, '_')
209
+ label.gsub!(/\s/, '_')
218
210
  end
219
211
  delimiter_position = label.index(':')
220
- if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
221
- label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
212
+ if delimiter_position && namespaces.key?(label[0...delimiter_position])
213
+ @uri_cache[label] = label.gsub('(', '\(').gsub(')', '\)')
222
214
  elsif label =~ %r[^https?://]
223
- "<#{label}>"
215
+ @uri_cache[label] = "<#{label}>"
224
216
  else
225
- clabel = if label.match(/^\W+$/)
226
- 'SYM'
227
- else
228
- label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
229
- end
230
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
217
+ clabel = label.match(/^\W+$/) ? 'SYM' : label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
218
+ @uri_cache[label] = namespaces.key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
231
219
  end
232
220
  end
233
221
 
@@ -274,7 +262,7 @@ class TAO::RDFizer
274
262
  # variable: namespaces
275
263
  ERB_PREFIXES_TTL = <<~HEREDOC
276
264
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
277
- @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
265
+ @prefix tao: <https://pubannotation.org/ontology/tao.owl#> .
278
266
  <%# namespaces -%>
279
267
  <% namespaces.each_key do |p| -%>
280
268
  <% if p == '_base' -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: 0.11.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-23 00:00:00.000000000 Z
11
+ date: 2024-09-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -33,14 +33,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
33
33
  requirements:
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: '0'
36
+ version: 3.0.0
37
37
  required_rubygems_version: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubygems_version: 3.0.9
43
+ rubygems_version: 3.5.11
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.