tao_rdfizer 0.10 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2f897b03c9e6856aaf752fd645808c47e6d52422ea1b6934b44b694d8d0d9a1
4
- data.tar.gz: 8d9fb426258bf3fceabb6471a81fcaeb99cd18dcace251575a0f2f715fa4d787
3
+ metadata.gz: ef68cf0cfd37026eda3abfb19f294d866c343294c1a9cc8f4ed2aa9af1f1443c
4
+ data.tar.gz: fdde442909c60d0dbfe81ea445b049a52829752d01b06f5f599786071b09b62c
5
5
  SHA512:
6
- metadata.gz: f0d230f25945bf45aed05f8f0cb3d46113d5eb4213b17ce4812cbfbcfa5636d540343b8ee8fe94cf1bbbc1a1eb4db35ac076bf1d7cd3cb7abdd92db446c94f41
7
- data.tar.gz: 6f00c1aeb735b8e76da2e9185ad141c0c29c1d45d94faad1b3d89ec903a1c931e22957f4f356dce24ff6beadd53a77ac7fcd5aad2c17cb281a5d0128cd062b89
6
+ metadata.gz: a0219d82d900259fd8e7dab5ea5382bc06bb504ea21207302b21b59047f33ba899491aa53043a9456af1c9e1693d9a957223a9cad31d8a2e21f4df60aebdbf4e
7
+ data.tar.gz: 5250d9a3daf7716e5c8f2e01fe2560589862fb3b7cdf7a03c067f07e47930f91b6912b7b9316d8de652fcf983f677e1ab0d9de16954f2fdf3880478e7f50344a
data/bin/tao_rdfizer CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
5
  mode = nil
6
+ options = {}
6
7
 
7
8
  ## command line option processing
8
9
  require 'optparse'
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
17
18
  mode = :spans
18
19
  end
19
20
 
21
+ opts.on('-x', '--x-prefixes', 'without prefixes.') do
22
+ options[:with_prefixes] = false
23
+ end
24
+
25
+ opts.on('-o', '--only-prefixes', 'only prefixes.') do
26
+ options[:only_prefixes] = true
27
+ end
28
+
20
29
  opts.on('-h', '--help', 'displays this screen.') do
21
30
  puts opts
22
31
  exit
@@ -34,7 +43,7 @@ begin
34
43
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
35
44
  annotations = [annotations] unless annotations.class == Array
36
45
  rdfizer = TAO::RDFizer.new(mode)
37
- puts rdfizer.rdfize(annotations)
46
+ puts rdfizer.rdfize(annotations, options)
38
47
  rescue ArgumentError, IOError => e
39
48
  puts e.message
40
49
  end
@@ -14,18 +14,45 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
-
18
17
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
18
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
19
  end
21
20
 
22
- def rdfize(annotations_col)
21
+ def rdfize(annotations_col, options = nil)
22
+ options ||= {}
23
+ only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
+ with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
25
+
26
+ # check the format
27
+ annotations_col.each do |annotations|
28
+ raise "'target' is missing" unless annotations.has_key? :target
29
+ end
30
+
23
31
  # namespaces
24
32
  namespaces = {}
25
33
 
26
34
  anns = annotations_col.first
27
35
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
36
 
37
+ prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
+
39
+ if only_prefixes
40
+ prefixes_ttl
41
+ else
42
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
+ if with_prefixes
44
+ prefixes_ttl + annotations_ttl
45
+ else
46
+ annotations_ttl
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def get_annotations_ttl(annotations_col, namespaces)
54
+ anns = annotations_col.first
55
+
29
56
  unless @mode ==:spans
30
57
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
31
58
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
@@ -53,14 +80,14 @@ class TAO::RDFizer
53
80
  _relations = annotations[:relations] || []
54
81
  if @mode == :spans && annotations.has_key?(:tracks)
55
82
  annotations[:tracks].each do |track|
56
- _denotations += track[:denotations]
57
- _attributes += track[:attributes]
58
- _relations += track[:relations]
83
+ _denotations += track[:denotations] if track.has_key? :denotations
84
+ _attributes += track[:attributes] if track.has_key? :attributes
85
+ _relations += track[:relations] if track.has_key? :relations
59
86
  end
60
87
  end
61
88
 
62
89
  begin
63
- if @mode == :annotations
90
+ unless @mode == :span
64
91
  # index attributes
65
92
  attributesh = _attributes.inject({}) do |h, a|
66
93
  if a[:pred].end_with?('_id')
@@ -76,7 +103,7 @@ class TAO::RDFizer
76
103
  span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
77
104
  d[:span_uri] = span_uri
78
105
  d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
79
- class_uris = attributesh[d[:id]].push(d[:obj])
106
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
80
107
  d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
81
108
  rescue ArgumentError => e
82
109
  raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
@@ -167,11 +194,9 @@ class TAO::RDFizer
167
194
  spans += _spans unless @mode == :annotations
168
195
  end
169
196
 
170
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
197
+ @tao_ttl_erb.result(binding)
171
198
  end
172
199
 
173
- private
174
-
175
200
  def include_parent?(spans, span)
176
201
  # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
177
202
  spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
@@ -187,7 +212,10 @@ class TAO::RDFizer
187
212
  end
188
213
 
189
214
  def find_uri (label, namespaces, prefix_for_this)
190
- raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
215
+ if label.match(/\s/)
216
+ # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
+ label.gsub(/\s/, '_')
218
+ end
191
219
  delimiter_position = label.index(':')
192
220
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
193
221
  label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
@@ -203,6 +231,18 @@ class TAO::RDFizer
203
231
  end
204
232
  end
205
233
 
234
+ def rdf_literal_escape(string)
235
+ string.gsub('\\', '\\\\').
236
+ gsub("\t", '\\t').
237
+ gsub("\b", '\\b').
238
+ gsub("\n", '\\n').
239
+ gsub("\r", '\\r').
240
+ gsub("\f", '\\f').
241
+ gsub('"', '\\"').
242
+ freeze
243
+ end
244
+
245
+ # variable: denotations, relations
206
246
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
207
247
  <% denotations.each do |d| -%>
208
248
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
@@ -214,6 +254,7 @@ class TAO::RDFizer
214
254
  <% end -%>
215
255
  HEREDOC
216
256
 
257
+ # variable: spans
217
258
  ERB_SPANS_TTL = <<~HEREDOC
218
259
  <% spans.each do |s| -%>
219
260
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
@@ -223,13 +264,14 @@ class TAO::RDFizer
223
264
  <% s[:children].each do |s| -%>
224
265
  tao:contains <%= s[:span_uri] %> ;
225
266
  <% end -%>
226
- tao:has_text <%= s[:text].inspect %> ;
267
+ tao:has_text "<%= rdf_literal_escape(s[:text]) %>" ;
227
268
  tao:belongs_to <<%= s[:source_uri] %>> ;
228
269
  tao:begins_at <%= s[:begin] %> ;
229
270
  tao:ends_at <%= s[:end] %> .
230
271
  <% end -%>
231
272
  HEREDOC
232
273
 
274
+ # variable: namespaces
233
275
  ERB_PREFIXES_TTL = <<~HEREDOC
234
276
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
235
277
  @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.10'
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-20 00:00:00.000000000 Z
11
+ date: 2021-05-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,7 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubygems_version: 3.0.8
43
+ rubygems_version: 3.0.9
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.