tao_rdfizer 0.9.13 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '00759ca80638a1d6f0d730af7c919ccf51c1828a614158987ffe4f5d99ff9b9a'
4
- data.tar.gz: bfb0c62f1e6e39ec615323d67edc36b16fa41ce0d7cb440e25a7f3d10013573b
3
+ metadata.gz: 95aaa9006671c09cfcf08d12876932c5783f3d784ad3855b4a3819ad79febf87
4
+ data.tar.gz: 6f3970db2a93208bd760c68651bc33a212d7e384396314deaebe1fdc744b20b0
5
5
  SHA512:
6
- metadata.gz: fe5fe5d6b21b68fe9a6e565641f8a9f1ed0c3ae10f714995a4a1f5282103246556b02570f5ac18d53d38d41f6dcbb04cb3c690382b2427f9b9e1d78f5055aacf
7
- data.tar.gz: 729279be8a26a908095195c90fb76900adc7f32b2cbbbd01ff7ef6e5115ec10c05c53fb8cd809dc7d3900d989c342301ddbdfea70eee6468fd8970f2aa69deca
6
+ metadata.gz: 6ffe933f1e7ecfc91b7e51d010fc84d42c5afabfc0cac59df717f264a45d337a4e67d5a2e56082cc00a67b9b20594156fd60fe51e700a4a78cef8c3d2c50fe20
7
+ data.tar.gz: efc71f7e1765dff987554031bbf4edcea8881cabf232ac5504b67fb33d4ac5b514b96f02410b2293b3a37b443508a4167786bd67cf696543802dcba150146299
data/bin/tao_rdfizer CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
5
  mode = nil
6
+ options = {}
6
7
 
7
8
  ## command line option processing
8
9
  require 'optparse'
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
17
18
  mode = :spans
18
19
  end
19
20
 
21
+ opts.on('-x', '--x-prefixes', 'without prefixes.') do
22
+ options[:with_prefixes] = false
23
+ end
24
+
25
+ opts.on('-o', '--only-prefixes', 'only prefixes.') do
26
+ options[:only_prefixes] = true
27
+ end
28
+
20
29
  opts.on('-h', '--help', 'displays this screen.') do
21
30
  puts opts
22
31
  exit
@@ -34,7 +43,7 @@ begin
34
43
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
35
44
  annotations = [annotations] unless annotations.class == Array
36
45
  rdfizer = TAO::RDFizer.new(mode)
37
- puts rdfizer.rdfize(annotations)
46
+ puts rdfizer.rdfize(annotations, options)
38
47
  rescue ArgumentError, IOError => e
39
48
  puts e.message
40
49
  end
@@ -14,18 +14,45 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
-
18
17
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
18
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
19
  end
21
20
 
22
- def rdfize(annotations_col)
21
+ def rdfize(annotations_col, options = nil)
22
+ options ||= {}
23
+ only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
+ with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
25
+
26
+ # check the format
27
+ annotations_col.each do |annotations|
28
+ raise "'target' is missing" unless annotations.has_key? :target
29
+ end
30
+
23
31
  # namespaces
24
32
  namespaces = {}
25
33
 
26
34
  anns = annotations_col.first
27
35
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
36
 
37
+ prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
+
39
+ if only_prefixes
40
+ prefixes_ttl
41
+ else
42
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
+ if with_prefixes
44
+ prefixes_ttl + annotations_ttl
45
+ else
46
+ annotations_ttl
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def get_annotations_ttl(annotations_col, namespaces)
54
+ anns = annotations_col.first
55
+
29
56
  unless @mode ==:spans
30
57
  raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
31
58
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
@@ -35,6 +62,7 @@ class TAO::RDFizer
35
62
  end
36
63
 
37
64
  denotations = []
65
+ attributes = []
38
66
  relations = []
39
67
  spans = []
40
68
 
@@ -47,25 +75,36 @@ class TAO::RDFizer
47
75
  end
48
76
 
49
77
  # denotations and relations
50
- _denotations = annotations[:denotations]
51
- _relations = annotations[:relations]
52
- _denotations = [] if _denotations.nil?
53
- _relations = [] if _relations.nil?
78
+ _denotations = annotations[:denotations] || []
79
+ _attributes = annotations[:attributes] || []
80
+ _relations = annotations[:relations] || []
54
81
  if @mode == :spans && annotations.has_key?(:tracks)
55
82
  annotations[:tracks].each do |track|
56
- _denotations += track[:denotations]
57
- _relations += track[:relations]
83
+ _denotations += track[:denotations] if track.has_key? :denotations
84
+ _attributes += track[:attributes] if track.has_key? :attributes
85
+ _relations += track[:relations] if track.has_key? :relations
58
86
  end
59
87
  end
60
88
 
61
89
  begin
62
- if @mode == :annotations
90
+ unless @mode == :span
91
+ # index attributes
92
+ attributesh = _attributes.inject({}) do |h, a|
93
+ if a[:pred].end_with?('_id')
94
+ subj = a[:subj]
95
+ h[subj] = [] unless h.has_key? subj
96
+ h[subj] << a[:obj]
97
+ end
98
+ h
99
+ end
100
+
63
101
  # denotations preprocessing
64
102
  _denotations.each do |d|
65
103
  span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
66
104
  d[:span_uri] = span_uri
67
105
  d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
68
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
106
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
107
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
69
108
  rescue ArgumentError => e
70
109
  raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
71
110
  end
@@ -101,7 +140,7 @@ class TAO::RDFizer
101
140
  s[:text] = text[s[:begin] ... s[:end]]
102
141
  end
103
142
 
104
- # index
143
+ # index spans
105
144
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
106
145
 
107
146
  # add denotation information
@@ -155,11 +194,9 @@ class TAO::RDFizer
155
194
  spans += _spans unless @mode == :annotations
156
195
  end
157
196
 
158
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
197
+ @tao_ttl_erb.result(binding)
159
198
  end
160
199
 
161
- private
162
-
163
200
  def include_parent?(spans, span)
164
201
  # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
165
202
  spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
@@ -175,7 +212,10 @@ class TAO::RDFizer
175
212
  end
176
213
 
177
214
  def find_uri (label, namespaces, prefix_for_this)
178
- raise ArgumentError, "A label including a whitespace character found: #{label}." if label.match(/\s/)
215
+ if label.match(/\s/)
216
+ # raise ArgumentError, "A label including a whitespace character found: #{label}."
217
+ label.gsub(/\s/, '_')
218
+ end
179
219
  delimiter_position = label.index(':')
180
220
  if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
181
221
  label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped
@@ -191,10 +231,11 @@ class TAO::RDFizer
191
231
  end
192
232
  end
193
233
 
234
+ # variable: denotations, relations
194
235
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
195
236
  <% denotations.each do |d| -%>
196
237
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
197
- rdf:type <%= d[:cls_uri] %> .
238
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
198
239
  <% end -%>
199
240
  <%# relations -%>
200
241
  <% relations.each do |r| -%>
@@ -202,6 +243,7 @@ class TAO::RDFizer
202
243
  <% end -%>
203
244
  HEREDOC
204
245
 
246
+ # variable: spans
205
247
  ERB_SPANS_TTL = <<~HEREDOC
206
248
  <% spans.each do |s| -%>
207
249
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
@@ -218,6 +260,7 @@ class TAO::RDFizer
218
260
  <% end -%>
219
261
  HEREDOC
220
262
 
263
+ # variable: namespaces
221
264
  ERB_PREFIXES_TTL = <<~HEREDOC
222
265
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
223
266
  @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.13
4
+ version: 0.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-06 00:00:00.000000000 Z
11
+ date: 2021-05-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.9
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.