tao_rdfizer 0.10 → 0.11.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2f897b03c9e6856aaf752fd645808c47e6d52422ea1b6934b44b694d8d0d9a1
4
- data.tar.gz: 8d9fb426258bf3fceabb6471a81fcaeb99cd18dcace251575a0f2f715fa4d787
3
+ metadata.gz: ef68cf0cfd37026eda3abfb19f294d866c343294c1a9cc8f4ed2aa9af1f1443c
4
+ data.tar.gz: fdde442909c60d0dbfe81ea445b049a52829752d01b06f5f599786071b09b62c
5
5
  SHA512:
6
- metadata.gz: f0d230f25945bf45aed05f8f0cb3d46113d5eb4213b17ce4812cbfbcfa5636d540343b8ee8fe94cf1bbbc1a1eb4db35ac076bf1d7cd3cb7abdd92db446c94f41
7
- data.tar.gz: 6f00c1aeb735b8e76da2e9185ad141c0c29c1d45d94faad1b3d89ec903a1c931e22957f4f356dce24ff6beadd53a77ac7fcd5aad2c17cb281a5d0128cd062b89
6
+ metadata.gz: a0219d82d900259fd8e7dab5ea5382bc06bb504ea21207302b21b59047f33ba899491aa53043a9456af1c9e1693d9a957223a9cad31d8a2e21f4df60aebdbf4e
7
+ data.tar.gz: 5250d9a3daf7716e5c8f2e01fe2560589862fb3b7cdf7a03c067f07e47930f91b6912b7b9316d8de652fcf983f677e1ab0d9de16954f2fdf3880478e7f50344a
data/bin/tao_rdfizer CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
5
  mode = nil
6
+ options = {}
6
7
 
7
8
  ## command line option processing
8
9
  require 'optparse'
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
17
18
  mode = :spans
18
19
  end
19
20
 
21
+ opts.on('-x', '--x-prefixes', 'without prefixes.') do
22
+ options[:with_prefixes] = false
23
+ end
24
+
25
+ opts.on('-o', '--only-prefixes', 'only prefixes.') do
26
+ options[:only_prefixes] = true
27
+ end
28
+
20
29
  opts.on('-h', '--help', 'displays this screen.') do
21
30
  puts opts
22
31
  exit
@@ -34,7 +43,7 @@ begin
34
43
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
35
44
  annotations = [annotations] unless annotations.class == Array
36
45
  rdfizer = TAO::RDFizer.new(mode)
37
- puts rdfizer.rdfize(annotations)
46
+ puts rdfizer.rdfize(annotations, options)
38
47
  rescue ArgumentError, IOError => e
39
48
  puts e.message
40
49
  end
@@ -14,18 +14,45 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
-
18
17
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
18
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
19
  end
21
20
 
22
- def rdfize(annotations_col)
21
+ def rdfize(annotations_col, options = nil)
22
+ options ||= {}
23
+ only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
+ with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
25
+
26
+ # check the format
27
+ annotations_col.each do |annotations|
28
+ raise "'target' is missing" unless annotations.has_key? :target
29
+ end
30
+
23
31
  # namespaces
24
32
  namespaces = {}
25
33
 
26
34
  anns = annotations_col.first
27
35
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
36
 
37
+ prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
+
39
+ if only_prefixes
40
+ prefixes_ttl
41
+ else
42
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
+ if with_prefixes
44
+ prefixes_ttl + annotations_ttl
45
+ else
46
+ annotations_ttl
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def get_annotations_ttl(annotations_col, namespaces)
54
+ anns = annotations_col.first
55
+
29
56
  unless @mode ==:spans
30
57
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
31
58
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
@@ -53,14 +80,14 @@ class TAO::RDFizer
53
80
  _relations = annotations[:relations] || []
54
81
  if @mode == :spans && annotations.has_key?(:tracks)
55
82
  annotations[:tracks].each do |track|
56
- _denotations += track[:denotations]
57
- _attributes += track[:attributes]
58
- _relations += track[:relations]
83
+ _denotations += track[:denotations] if track.has_key? :denotations
84
+ _attributes += track[:attributes] if track.has_key? :attributes
85
+ _relations += track[:relations] if track.has_key? :relations
59
86
  end
60
87
  end
61
88
 
62
89
  begin
63
- if @mode == :annotations
90
+ unless @mode == :span
64
91
  # index attributes
65
92
  attributesh = _attributes.inject({}) do |h, a|
66
93
  if a[:pred].end_with?('_id')
@@ -76,7 +103,7 @@ class TAO::RDFizer
76
103
  span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
77
104
  d[:span_uri] = span_uri
78
105
  d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
79
- class_uris = attributesh[d[:id]].push(d[:obj])
106
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
80
107
  d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
81
108
  rescue ArgumentError => e
82
109
  raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
@@ -167,11 +194,9 @@ class TAO::RDFizer
167
194
  spans += _spans unless @mode == :annotations
168
195
  end
169
196
 
170
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
197
+ @tao_ttl_erb.result(binding)
171
198
  end
172
199
 
173
- private
174
-
175
200
  def include_parent?(spans, span)
176
201
  # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
177
202
  spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
@@ -187,7 +212,10 @@ class TAO::RDFizer
187
212
  end
188
213
 
189
214
  def find_uri (label, namespaces, prefix_for_this)
190
- raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
215
+ if label.match(/\s/)
216
+ # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
+ label.gsub(/\s/, '_')
218
+ end
191
219
  delimiter_position = label.index(':')
192
220
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
193
221
  label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
@@ -203,6 +231,18 @@ class TAO::RDFizer
203
231
  end
204
232
  end
205
233
 
234
+ def rdf_literal_escape(string)
235
+ string.gsub('\\', '\\\\').
236
+ gsub("\t", '\\t').
237
+ gsub("\b", '\\b').
238
+ gsub("\n", '\\n').
239
+ gsub("\r", '\\r').
240
+ gsub("\f", '\\f').
241
+ gsub('"', '\\"').
242
+ freeze
243
+ end
244
+
245
+ # variable: denotations, relations
206
246
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
207
247
  <% denotations.each do |d| -%>
208
248
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
@@ -214,6 +254,7 @@ class TAO::RDFizer
214
254
  <% end -%>
215
255
  HEREDOC
216
256
 
257
+ # variable: spans
217
258
  ERB_SPANS_TTL = <<~HEREDOC
218
259
  <% spans.each do |s| -%>
219
260
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
@@ -223,13 +264,14 @@ class TAO::RDFizer
223
264
  <% s[:children].each do |s| -%>
224
265
  tao:contains <%= s[:span_uri] %> ;
225
266
  <% end -%>
226
- tao:has_text <%= s[:text].inspect %> ;
267
+ tao:has_text "<%= rdf_literal_escape(s[:text]) %>" ;
227
268
  tao:belongs_to <<%= s[:source_uri] %>> ;
228
269
  tao:begins_at <%= s[:begin] %> ;
229
270
  tao:ends_at <%= s[:end] %> .
230
271
  <% end -%>
231
272
  HEREDOC
232
273
 
274
+ # variable: namespaces
233
275
  ERB_PREFIXES_TTL = <<~HEREDOC
234
276
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
235
277
  @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.10'
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-20 00:00:00.000000000 Z
11
+ date: 2021-05-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,7 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubygems_version: 3.0.8
43
+ rubygems_version: 3.0.9
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.