tao_rdfizer 0.9.12 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7478d76882777c4b2eb12aaa55c293cac0de4db6fd0afc25db7d0ffb744b805f
4
- data.tar.gz: e488ef63144750962cbe746fe1315586575fb0e90797ad7f2adfd0c8511eeaf4
3
+ metadata.gz: ce27142842502109c882dc01222a5aa8e402f21c9305bcd5797a712dde5a0f8b
4
+ data.tar.gz: 8b542b1d3d4992e1129fa95cde633967bb55e805d7ed69fc36cb95940215004f
5
5
  SHA512:
6
- metadata.gz: 409af43001ef308fb60b88e52a19d0bb4ec24cfaf9c6dcd99cc557c556b1d386569431aadc326384182c3e154f27d32d7f516e819648edd99339ffb62f2d9455
7
- data.tar.gz: f7ab042f93804550904b805542bae7ced0fe3a6be956632ae2253848fee06e876b09e23f7f1068e8a641854b574d7086cbb7fdf8d2a7af46fdddc0429db395f6
6
+ metadata.gz: 65aecd55bf6fbd609565800231c8f87cad533bc25422912927ef9293e4e7e03bcab6bb5fd639320bf184cd7f0b0aa5de9c5a880734e768f6830761b5eaa6afec
7
+ data.tar.gz: ef195ea7ddae0dfa1409659066e5700369659d08fd40390c85f3384c3476ca44defd9c8eee602d6ebb5ef0dc75d63894a76fc5f597d79e57f1bf28598ecd20f5
data/bin/tao_rdfizer CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
5
  mode = nil
6
+ options = {}
6
7
 
7
8
  ## command line option processing
8
9
  require 'optparse'
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
17
18
  mode = :spans
18
19
  end
19
20
 
21
+ opts.on('-x', '--x-prefixes', 'without prefixes.') do
22
+ options[:with_prefixes] = false
23
+ end
24
+
25
+ opts.on('-o', '--only-prefixes', 'only prefixes.') do
26
+ options[:only_prefixes] = true
27
+ end
28
+
20
29
  opts.on('-h', '--help', 'displays this screen.') do
21
30
  puts opts
22
31
  exit
@@ -34,7 +43,7 @@ begin
34
43
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
35
44
  annotations = [annotations] unless annotations.class == Array
36
45
  rdfizer = TAO::RDFizer.new(mode)
37
- puts rdfizer.rdfize(annotations)
46
+ puts rdfizer.rdfize(annotations, options)
38
47
  rescue ArgumentError, IOError => e
39
48
  puts e.message
40
49
  end
@@ -14,19 +14,47 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
-
18
17
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
18
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
19
  end
21
20
 
22
- def rdfize(annotations_col)
21
+ def rdfize(annotations_col, options = nil)
22
+ options ||= {}
23
+ only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
+ with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
25
+
26
+ # check the format
27
+ annotations_col.each do |annotations|
28
+ raise "'target' is missing" unless annotations.has_key? :target
29
+ end
30
+
23
31
  # namespaces
24
32
  namespaces = {}
25
33
 
26
34
  anns = annotations_col.first
27
35
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
36
 
37
+ prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
+
39
+ if only_prefixes
40
+ prefixes_ttl
41
+ else
42
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
+ if with_prefixes
44
+ prefixes_ttl + annotations_ttl
45
+ else
46
+ annotations_ttl
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def get_annotations_ttl(annotations_col, namespaces)
54
+ anns = annotations_col.first
55
+
29
56
  unless @mode ==:spans
57
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
58
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
59
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
60
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +62,7 @@ class TAO::RDFizer
34
62
  end
35
63
 
36
64
  denotations = []
65
+ attributes = []
37
66
  relations = []
38
67
  spans = []
39
68
 
@@ -46,34 +75,57 @@ class TAO::RDFizer
46
75
  end
47
76
 
48
77
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
78
+ _denotations = annotations[:denotations] || []
79
+ _attributes = annotations[:attributes] || []
80
+ _relations = annotations[:relations] || []
53
81
  if @mode == :spans && annotations.has_key?(:tracks)
54
82
  annotations[:tracks].each do |track|
55
83
  _denotations += track[:denotations]
84
+ _attributes += track[:attributes]
56
85
  _relations += track[:relations]
57
86
  end
58
87
  end
59
88
 
60
89
  begin
61
- # denotations preprocessing
62
- _denotations.each do |d|
63
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
64
- d[:span_uri] = span_uri
65
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
66
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
67
- end
90
+ unless @mode == :span
91
+ # index attributes
92
+ attributesh = _attributes.inject({}) do |h, a|
93
+ if a[:pred].end_with?('_id')
94
+ subj = a[:subj]
95
+ h[subj] = [] unless h.has_key? subj
96
+ h[subj] << a[:obj]
97
+ end
98
+ h
99
+ end
68
100
 
69
- # relations preprocessing
70
- _relations.each do |r|
71
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
72
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
73
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
101
+ # denotations preprocessing
102
+ _denotations.each do |d|
103
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
104
+ d[:span_uri] = span_uri
105
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
106
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
107
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
108
+ rescue ArgumentError => e
109
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
110
+ end
111
+
112
+ # relations preprocessing
113
+ _relations.each do |r|
114
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
115
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
116
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
117
+ rescue ArgumentError => e
118
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
119
+ end
120
+ else
121
+ # denotations preprocessing
122
+ _denotations.each do |d|
123
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
124
+ d[:span_uri] = span_uri
125
+ rescue ArgumentError => e
126
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
127
+ end
74
128
  end
75
- rescue ArgumentError => e
76
- raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
77
129
  end
78
130
 
79
131
  unless @mode == :annotations
@@ -88,7 +140,7 @@ class TAO::RDFizer
88
140
  s[:text] = text[s[:begin] ... s[:end]]
89
141
  end
90
142
 
91
- # index
143
+ # index spans
92
144
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
93
145
 
94
146
  # add denotation information
@@ -142,11 +194,9 @@ class TAO::RDFizer
142
194
  spans += _spans unless @mode == :annotations
143
195
  end
144
196
 
145
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
197
+ @tao_ttl_erb.result(binding)
146
198
  end
147
199
 
148
- private
149
-
150
200
  def include_parent?(spans, span)
151
201
  # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
152
202
  spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
@@ -178,10 +228,11 @@ class TAO::RDFizer
178
228
  end
179
229
  end
180
230
 
231
+ # variable: denotations, relations
181
232
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
182
233
  <% denotations.each do |d| -%>
183
234
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
184
- rdf:type <%= d[:cls_uri] %> .
235
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
185
236
  <% end -%>
186
237
  <%# relations -%>
187
238
  <% relations.each do |r| -%>
@@ -189,6 +240,7 @@ class TAO::RDFizer
189
240
  <% end -%>
190
241
  HEREDOC
191
242
 
243
+ # variable: spans
192
244
  ERB_SPANS_TTL = <<~HEREDOC
193
245
  <% spans.each do |s| -%>
194
246
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
@@ -205,6 +257,7 @@ class TAO::RDFizer
205
257
  <% end -%>
206
258
  HEREDOC
207
259
 
260
+ # variable: namespaces
208
261
  ERB_PREFIXES_TTL = <<~HEREDOC
209
262
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
210
263
  @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.12
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2021-05-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.9
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.