tao_rdfizer 0.11.3 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +43 -55
  3. metadata +4 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 122108e353debbc7d153a9aaa4f4a7b1d0d964ab1f24697443d2e65977e61efa
4
- data.tar.gz: 2a62939c0ac1bf15e1225a1512d3d797c379ddb563bfeb422996c752b1865f61
3
+ metadata.gz: 59d7999a921ef33104728a1f86edbeb7f20161641b37a53435daf316772ffed8
4
+ data.tar.gz: d6cd66e38eececa7b7cbb251686d148a4924da16ad751eea3af2cef21b00121a
5
5
  SHA512:
6
- metadata.gz: 6b25ad6199e58d03b7e8acdf4c60a8a8c9c87b634940edddebb73b696a5e4bb37b96b7d74cb818acb93b951ea5977ae72ec8273c3ec264d3c73d6e381efe1092
7
- data.tar.gz: 5085e04e204ef0eb8f1282e0f659a2cc60eae7ef70283c658072b310683f016cf2c1806ff71cad66311950b246eb99a402e502d799f998acd34434ea9d3bc4ff
6
+ metadata.gz: 4e98ac931a0f2913f44e8e234222f55c93ff9bbd76173787393fcf1e866a14b7707168c8446f33cfd00f208599439cacdd67b4b6296c9ab8a1288a3963fd4f0c
7
+ data.tar.gz: 788aaf5c427d44783edadfc30f84a8d2cc7aac99641575dd3999f780520c374cf5c443fc9a5dd656424a4291940861fa268a37b6ca4f3dab9a2ae196f6df2bc8
@@ -14,14 +14,14 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
- @tao_ttl_erb = ERB.new(template, nil, '-')
18
- @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
17
+ @tao_ttl_erb = ERB.new(template, trim_mode: '-')
18
+ @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, trim_mode: '-')
19
19
  end
20
20
 
21
21
  def rdfize(annotations_col, options = nil)
22
22
  options ||= {}
23
- only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
- with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
23
+ only_prefixes = options[:only_prefixes] == true
24
+ with_prefixes = options[:with_prefixes] != true
25
25
 
26
26
  # check the format
27
27
  annotations_col.each do |annotations|
@@ -29,35 +29,33 @@ class TAO::RDFizer
29
29
  end
30
30
 
31
31
  # namespaces
32
- namespaces = {}
33
-
34
- anns = annotations_col.first
35
- anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
32
+ namespaces = get_namespaces(annotations_col.first)
36
33
 
37
34
  prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
35
 
39
- if only_prefixes
40
- prefixes_ttl
41
- else
42
- annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
- if with_prefixes
44
- prefixes_ttl + annotations_ttl
45
- else
46
- annotations_ttl
47
- end
48
- end
36
+ return prefixes_ttl if only_prefixes
37
+
38
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
39
+ with_prefixes ? prefixes_ttl + annotations_ttl : annotations_ttl
49
40
  end
50
41
 
51
42
  private
52
43
 
53
- def get_annotations_ttl(annotations_col, namespaces)
54
- anns = annotations_col.first
44
+ def get_namespaces(annotations)
45
+ return {} if annotations[:namespaces].nil?
46
+
47
+ annotations[:namespaces].each_with_object({}) do |n, namespaces|
48
+ namespaces[n[:prefix]] = n[:uri]
49
+ end
50
+ end
55
51
 
52
+ def get_annotations_ttl(annotations_col, namespaces)
56
53
  unless @mode ==:spans
54
+ anns = annotations_col.first
57
55
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
58
56
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
59
57
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
60
- project_uri = 'http://pubannotation.org/projects/' + anns[:project]
58
+ project_uri = 'https://pubannotation.org/projects/' + anns[:project]
61
59
  namespaces[prefix_for_this] = project_uri + '/'
62
60
  end
63
61
 
@@ -70,8 +68,8 @@ class TAO::RDFizer
70
68
  text = annotations[:text]
71
69
  text_uri = annotations[:target]
72
70
  text_id = begin
73
- sourcedb, sourceid, divid = get_target_info(text_uri)
74
- divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
71
+ sourcedb, sourceid = get_target_info(text_uri)
72
+ "#{sourcedb}-#{sourceid}"
75
73
  end
76
74
 
77
75
  # denotations and relations
@@ -153,37 +151,29 @@ class TAO::RDFizer
153
151
  end
154
152
  end
155
153
 
156
- _spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
154
+ _spans.sort_by! { |s| [s[:begin], -s[:end]] }
157
155
 
158
156
  ## begin indexing
159
157
  len = text.length
160
158
  num = _spans.length
161
159
 
162
- # initilaize the index
163
- (0 ... num).each do |i|
164
- _spans[i][:followings] = []
165
- _spans[i][:children] = []
166
- end
160
+ # create the indexes for followings and children
161
+ _spans.each_with_index do |span, i|
162
+ span[:followings] = []
163
+ span[:children] = []
167
164
 
168
- (0 ... num).each do |i|
169
- # find the first following span
170
165
  j = i + 1
171
-
172
- while j < num && _spans[j][:begin] < _spans[i][:end]
173
- unless include_parent?(_spans[i][:children], _spans[j])
174
- _spans[i][:children] << _spans[j]
175
- end
166
+ while j < num && _spans[j][:begin] < span[:end]
167
+ span[:children] << _spans[j] unless include_parent?(span[:children], _spans[j])
176
168
  j += 1
177
169
  end
178
170
 
179
- # find adjacent positions
180
- fp = _spans[i][:end]
171
+ fp = span[:end]
181
172
  fp += 1 while fp < len && text[fp].match(/\s/)
182
173
  next if fp == len
183
174
 
184
- # index adjacent spans
185
175
  while j < num && _spans[j][:begin] == fp
186
- _spans[i][:followings] << _spans[j]
176
+ span[:followings] << _spans[j]
187
177
  j += 1
188
178
  end
189
179
  end
@@ -206,28 +196,26 @@ class TAO::RDFizer
206
196
  def get_target_info (text_uri)
207
197
  sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
208
198
  sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
209
- divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
210
199
 
211
- return sourcedb, sourceid, divid
200
+ return sourcedb, sourceid
212
201
  end
213
202
 
214
- def find_uri (label, namespaces, prefix_for_this)
203
+ def find_uri(label, namespaces, prefix_for_this)
204
+ @uri_cache ||= {}
205
+
206
+ return @uri_cache[label] if @uri_cache.key?(label)
207
+
215
208
  if label.match(/\s/)
216
- # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
- label.gsub(/\s/, '_')
209
+ label.gsub!(/\s/, '_')
218
210
  end
219
211
  delimiter_position = label.index(':')
220
- if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
221
- label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
212
+ if delimiter_position && namespaces.key?(label[0...delimiter_position])
213
+ @uri_cache[label] = label.gsub('(', '\(').gsub(')', '\)')
222
214
  elsif label =~ %r[^https?://]
223
- "<#{label}>"
215
+ @uri_cache[label] = "<#{label}>"
224
216
  else
225
- clabel = if label.match(/^\W+$/)
226
- 'SYM'
227
- else
228
- label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
229
- end
230
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
217
+ clabel = label.match(/^\W+$/) ? 'SYM' : label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
218
+ @uri_cache[label] = namespaces.key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
231
219
  end
232
220
  end
233
221
 
@@ -274,7 +262,7 @@ class TAO::RDFizer
274
262
  # variable: namespaces
275
263
  ERB_PREFIXES_TTL = <<~HEREDOC
276
264
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
277
- @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
265
+ @prefix tao: <https://pubannotation.org/ontology/tao.owl#> .
278
266
  <%# namespaces -%>
279
267
  <% namespaces.each_key do |p| -%>
280
268
  <% if p == '_base' -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: 0.11.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-23 00:00:00.000000000 Z
11
+ date: 2024-09-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -33,14 +33,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
33
33
  requirements:
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: '0'
36
+ version: 3.0.0
37
37
  required_rubygems_version: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubygems_version: 3.0.9
43
+ rubygems_version: 3.5.11
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.