tao_rdfizer 0.11.3 → 0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +45 -55
  3. metadata +4 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 122108e353debbc7d153a9aaa4f4a7b1d0d964ab1f24697443d2e65977e61efa
4
- data.tar.gz: 2a62939c0ac1bf15e1225a1512d3d797c379ddb563bfeb422996c752b1865f61
3
+ metadata.gz: 9f8e898ea335e5047e9b98c5f61d91f746ae35cdeae4e74c0fea729533f6c4a8
4
+ data.tar.gz: 9718f84c225451729468700d5ffc54952150de9f178c713b6bb82a13b440dfca
5
5
  SHA512:
6
- metadata.gz: 6b25ad6199e58d03b7e8acdf4c60a8a8c9c87b634940edddebb73b696a5e4bb37b96b7d74cb818acb93b951ea5977ae72ec8273c3ec264d3c73d6e381efe1092
7
- data.tar.gz: 5085e04e204ef0eb8f1282e0f659a2cc60eae7ef70283c658072b310683f016cf2c1806ff71cad66311950b246eb99a402e502d799f998acd34434ea9d3bc4ff
6
+ metadata.gz: 53c9ab169c713eb9cfaef0199c6497e39ba33ca4b9645699b2217376a266fa25fd60d70a97e47db14b10429760b6618614c140c170a60d85caf065ca3c3d8498
7
+ data.tar.gz: 5e9673f10325c1e10044be8d9c89819b21b60b056b0a9591eed83290763d038aefe49906a201022722a31b7bf8b50990b59abd2fd8439d7d1d00da48784075cc
@@ -14,14 +14,14 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
- @tao_ttl_erb = ERB.new(template, nil, '-')
18
- @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
17
+ @tao_ttl_erb = ERB.new(template, trim_mode: '-')
18
+ @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, trim_mode: '-')
19
19
  end
20
20
 
21
21
  def rdfize(annotations_col, options = nil)
22
22
  options ||= {}
23
- only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
- with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
23
+ only_prefixes = options[:only_prefixes] == true
24
+ with_prefixes = options[:with_prefixes] != true
25
25
 
26
26
  # check the format
27
27
  annotations_col.each do |annotations|
@@ -29,35 +29,33 @@ class TAO::RDFizer
29
29
  end
30
30
 
31
31
  # namespaces
32
- namespaces = {}
33
-
34
- anns = annotations_col.first
35
- anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
32
+ namespaces = get_namespaces(annotations_col.first)
36
33
 
37
34
  prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
35
 
39
- if only_prefixes
40
- prefixes_ttl
41
- else
42
- annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
- if with_prefixes
44
- prefixes_ttl + annotations_ttl
45
- else
46
- annotations_ttl
47
- end
48
- end
36
+ return prefixes_ttl if only_prefixes
37
+
38
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
39
+ with_prefixes ? prefixes_ttl + annotations_ttl : annotations_ttl
49
40
  end
50
41
 
51
42
  private
52
43
 
53
- def get_annotations_ttl(annotations_col, namespaces)
54
- anns = annotations_col.first
44
+ def get_namespaces(annotations)
45
+ return {} if annotations[:namespaces].nil?
46
+
47
+ annotations[:namespaces].each_with_object({}) do |n, namespaces|
48
+ namespaces[n[:prefix]] = n[:uri]
49
+ end
50
+ end
55
51
 
52
+ def get_annotations_ttl(annotations_col, namespaces)
56
53
  unless @mode ==:spans
54
+ anns = annotations_col.first
57
55
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
58
56
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
59
57
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
60
- project_uri = 'http://pubannotation.org/projects/' + anns[:project]
58
+ project_uri = 'https://pubannotation.org/projects/' + anns[:project]
61
59
  namespaces[prefix_for_this] = project_uri + '/'
62
60
  end
63
61
 
@@ -70,12 +68,14 @@ class TAO::RDFizer
70
68
  text = annotations[:text]
71
69
  text_uri = annotations[:target]
72
70
  text_id = begin
73
- sourcedb, sourceid, divid = get_target_info(text_uri)
74
- divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
71
+ sourcedb, sourceid = get_target_info(text_uri)
72
+ "#{sourcedb}-#{sourceid}"
75
73
  end
76
74
 
77
75
  # denotations and relations
78
76
  _denotations = annotations[:denotations] || []
77
+ _blocks = annotations[:blocks] || []
78
+ _denotations += _blocks
79
79
  _attributes = annotations[:attributes] || []
80
80
  _relations = annotations[:relations] || []
81
81
  if @mode == :spans && annotations.has_key?(:tracks)
@@ -153,37 +153,29 @@ class TAO::RDFizer
153
153
  end
154
154
  end
155
155
 
156
- _spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
156
+ _spans.sort_by! { |s| [s[:begin], -s[:end]] }
157
157
 
158
158
  ## begin indexing
159
159
  len = text.length
160
160
  num = _spans.length
161
161
 
162
- # initilaize the index
163
- (0 ... num).each do |i|
164
- _spans[i][:followings] = []
165
- _spans[i][:children] = []
166
- end
162
+ # create the indexes for followings and children
163
+ _spans.each_with_index do |span, i|
164
+ span[:followings] = []
165
+ span[:children] = []
167
166
 
168
- (0 ... num).each do |i|
169
- # find the first following span
170
167
  j = i + 1
171
-
172
- while j < num && _spans[j][:begin] < _spans[i][:end]
173
- unless include_parent?(_spans[i][:children], _spans[j])
174
- _spans[i][:children] << _spans[j]
175
- end
168
+ while j < num && _spans[j][:begin] < span[:end]
169
+ span[:children] << _spans[j] unless include_parent?(span[:children], _spans[j])
176
170
  j += 1
177
171
  end
178
172
 
179
- # find adjacent positions
180
- fp = _spans[i][:end]
173
+ fp = span[:end]
181
174
  fp += 1 while fp < len && text[fp].match(/\s/)
182
175
  next if fp == len
183
176
 
184
- # index adjacent spans
185
177
  while j < num && _spans[j][:begin] == fp
186
- _spans[i][:followings] << _spans[j]
178
+ span[:followings] << _spans[j]
187
179
  j += 1
188
180
  end
189
181
  end
@@ -206,28 +198,26 @@ class TAO::RDFizer
206
198
  def get_target_info (text_uri)
207
199
  sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
208
200
  sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
209
- divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
210
201
 
211
- return sourcedb, sourceid, divid
202
+ return sourcedb, sourceid
212
203
  end
213
204
 
214
- def find_uri (label, namespaces, prefix_for_this)
205
+ def find_uri(label, namespaces, prefix_for_this)
206
+ @uri_cache ||= {}
207
+
208
+ return @uri_cache[label] if @uri_cache.key?(label)
209
+
215
210
  if label.match(/\s/)
216
- # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
- label.gsub(/\s/, '_')
211
+ label.gsub!(/\s/, '_')
218
212
  end
219
213
  delimiter_position = label.index(':')
220
- if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
221
- label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
214
+ if delimiter_position && namespaces.key?(label[0...delimiter_position])
215
+ @uri_cache[label] = label.gsub('(', '\(').gsub(')', '\)')
222
216
  elsif label =~ %r[^https?://]
223
- "<#{label}>"
217
+ @uri_cache[label] = "<#{label}>"
224
218
  else
225
- clabel = if label.match(/^\W+$/)
226
- 'SYM'
227
- else
228
- label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
229
- end
230
- namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
219
+ clabel = label.match(/^\W+$/) ? 'SYM' : label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
220
+ @uri_cache[label] = namespaces.key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
231
221
  end
232
222
  end
233
223
 
@@ -274,7 +264,7 @@ class TAO::RDFizer
274
264
  # variable: namespaces
275
265
  ERB_PREFIXES_TTL = <<~HEREDOC
276
266
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
277
- @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
267
+ @prefix tao: <https://pubannotation.org/ontology/tao.owl#> .
278
268
  <%# namespaces -%>
279
269
  <% namespaces.each_key do |p| -%>
280
270
  <% if p == '_base' -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: '0.12'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-23 00:00:00.000000000 Z
11
+ date: 2024-12-01 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -33,14 +33,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
33
33
  requirements:
34
34
  - - ">="
35
35
  - !ruby/object:Gem::Version
36
- version: '0'
36
+ version: 3.0.0
37
37
  required_rubygems_version: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubygems_version: 3.0.9
43
+ rubygems_version: 3.5.11
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.