tao_rdfizer 0.9.13 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '00759ca80638a1d6f0d730af7c919ccf51c1828a614158987ffe4f5d99ff9b9a'
4
- data.tar.gz: bfb0c62f1e6e39ec615323d67edc36b16fa41ce0d7cb440e25a7f3d10013573b
3
+ metadata.gz: 95aaa9006671c09cfcf08d12876932c5783f3d784ad3855b4a3819ad79febf87
4
+ data.tar.gz: 6f3970db2a93208bd760c68651bc33a212d7e384396314deaebe1fdc744b20b0
5
5
  SHA512:
6
- metadata.gz: fe5fe5d6b21b68fe9a6e565641f8a9f1ed0c3ae10f714995a4a1f5282103246556b02570f5ac18d53d38d41f6dcbb04cb3c690382b2427f9b9e1d78f5055aacf
7
- data.tar.gz: 729279be8a26a908095195c90fb76900adc7f32b2cbbbd01ff7ef6e5115ec10c05c53fb8cd809dc7d3900d989c342301ddbdfea70eee6468fd8970f2aa69deca
6
+ metadata.gz: 6ffe933f1e7ecfc91b7e51d010fc84d42c5afabfc0cac59df717f264a45d337a4e67d5a2e56082cc00a67b9b20594156fd60fe51e700a4a78cef8c3d2c50fe20
7
+ data.tar.gz: efc71f7e1765dff987554031bbf4edcea8881cabf232ac5504b67fb33d4ac5b514b96f02410b2293b3a37b443508a4167786bd67cf696543802dcba150146299
data/bin/tao_rdfizer CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
5
  mode = nil
6
+ options = {}
6
7
 
7
8
  ## command line option processing
8
9
  require 'optparse'
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
17
18
  mode = :spans
18
19
  end
19
20
 
21
+ opts.on('-x', '--x-prefixes', 'without prefixes.') do
22
+ options[:with_prefixes] = false
23
+ end
24
+
25
+ opts.on('-o', '--only-prefixes', 'only prefixes.') do
26
+ options[:only_prefixes] = true
27
+ end
28
+
20
29
  opts.on('-h', '--help', 'displays this screen.') do
21
30
  puts opts
22
31
  exit
@@ -34,7 +43,7 @@ begin
34
43
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
35
44
  annotations = [annotations] unless annotations.class == Array
36
45
  rdfizer = TAO::RDFizer.new(mode)
37
- puts rdfizer.rdfize(annotations)
46
+ puts rdfizer.rdfize(annotations, options)
38
47
  rescue ArgumentError, IOError => e
39
48
  puts e.message
40
49
  end
@@ -14,18 +14,45 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
-
18
17
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
18
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
19
  end
21
20
 
22
- def rdfize(annotations_col)
21
+ def rdfize(annotations_col, options = nil)
22
+ options ||= {}
23
+ only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
+ with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
25
+
26
+ # check the format
27
+ annotations_col.each do |annotations|
28
+ raise "'target' is missing" unless annotations.has_key? :target
29
+ end
30
+
23
31
  # namespaces
24
32
  namespaces = {}
25
33
 
26
34
  anns = annotations_col.first
27
35
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
36
 
37
+ prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
+
39
+ if only_prefixes
40
+ prefixes_ttl
41
+ else
42
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
+ if with_prefixes
44
+ prefixes_ttl + annotations_ttl
45
+ else
46
+ annotations_ttl
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def get_annotations_ttl(annotations_col, namespaces)
54
+ anns = annotations_col.first
55
+
29
56
  unless @mode ==:spans
30
57
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
31
58
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
@@ -35,6 +62,7 @@ class TAO::RDFizer
35
62
  end
36
63
 
37
64
  denotations = []
65
+ attributes = []
38
66
  relations = []
39
67
  spans = []
40
68
 
@@ -47,25 +75,36 @@ class TAO::RDFizer
47
75
  end
48
76
 
49
77
  # denotations and relations
50
- _denotations = annotations[:denotations]
51
- _relations = annotations[:relations]
52
- _denotations = [] if _denotations.nil?
53
- _relations = [] if _relations.nil?
78
+ _denotations = annotations[:denotations] || []
79
+ _attributes = annotations[:attributes] || []
80
+ _relations = annotations[:relations] || []
54
81
  if @mode == :spans && annotations.has_key?(:tracks)
55
82
  annotations[:tracks].each do |track|
56
- _denotations += track[:denotations]
57
- _relations += track[:relations]
83
+ _denotations += track[:denotations] if track.has_key? :denotations
84
+ _attributes += track[:attributes] if track.has_key? :attributes
85
+ _relations += track[:relations] if track.has_key? :relations
58
86
  end
59
87
  end
60
88
 
61
89
  begin
62
- if @mode == :annotations
90
+ unless @mode == :span
91
+ # index attributes
92
+ attributesh = _attributes.inject({}) do |h, a|
93
+ if a[:pred].end_with?('_id')
94
+ subj = a[:subj]
95
+ h[subj] = [] unless h.has_key? subj
96
+ h[subj] << a[:obj]
97
+ end
98
+ h
99
+ end
100
+
63
101
  # denotations preprocessing
64
102
  _denotations.each do |d|
65
103
  span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
66
104
  d[:span_uri] = span_uri
67
105
  d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
68
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
106
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
107
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
69
108
  rescue ArgumentError => e
70
109
  raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
71
110
  end
@@ -101,7 +140,7 @@ class TAO::RDFizer
101
140
  s[:text] = text[s[:begin] ... s[:end]]
102
141
  end
103
142
 
104
- # index
143
+ # index spans
105
144
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
106
145
 
107
146
  # add denotation information
@@ -155,11 +194,9 @@ class TAO::RDFizer
155
194
  spans += _spans unless @mode == :annotations
156
195
  end
157
196
 
158
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
197
+ @tao_ttl_erb.result(binding)
159
198
  end
160
199
 
161
- private
162
-
163
200
  def include_parent?(spans, span)
164
201
  # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
165
202
  spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
@@ -175,7 +212,10 @@ class TAO::RDFizer
175
212
  end
176
213
 
177
214
  def find_uri (label, namespaces, prefix_for_this)
178
- raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
215
+ if label.match(/\s/)
216
+ # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
+ label.gsub(/\s/, '_')
218
+ end
179
219
  delimiter_position = label.index(':')
180
220
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
181
221
  label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
@@ -191,10 +231,11 @@ class TAO::RDFizer
191
231
  end
192
232
  end
193
233
 
234
+ # variable: denotations, relations
194
235
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
195
236
  <% denotations.each do |d| -%>
196
237
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
197
- rdf:type <%= d[:cls_uri] %> .
238
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
198
239
  <% end -%>
199
240
  <%# relations -%>
200
241
  <% relations.each do |r| -%>
@@ -202,6 +243,7 @@ class TAO::RDFizer
202
243
  <% end -%>
203
244
  HEREDOC
204
245
 
246
+ # variable: spans
205
247
  ERB_SPANS_TTL = <<~HEREDOC
206
248
  <% spans.each do |s| -%>
207
249
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
@@ -218,6 +260,7 @@ class TAO::RDFizer
218
260
  <% end -%>
219
261
  HEREDOC
220
262
 
263
+ # variable: namespaces
221
264
  ERB_PREFIXES_TTL = <<~HEREDOC
222
265
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
223
266
  @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.13
4
+ version: 0.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-06 00:00:00.000000000 Z
11
+ date: 2021-05-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.9
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.