tao_rdfizer 0.9.12 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7478d76882777c4b2eb12aaa55c293cac0de4db6fd0afc25db7d0ffb744b805f
4
- data.tar.gz: e488ef63144750962cbe746fe1315586575fb0e90797ad7f2adfd0c8511eeaf4
3
+ metadata.gz: ce27142842502109c882dc01222a5aa8e402f21c9305bcd5797a712dde5a0f8b
4
+ data.tar.gz: 8b542b1d3d4992e1129fa95cde633967bb55e805d7ed69fc36cb95940215004f
5
5
  SHA512:
6
- metadata.gz: 409af43001ef308fb60b88e52a19d0bb4ec24cfaf9c6dcd99cc557c556b1d386569431aadc326384182c3e154f27d32d7f516e819648edd99339ffb62f2d9455
7
- data.tar.gz: f7ab042f93804550904b805542bae7ced0fe3a6be956632ae2253848fee06e876b09e23f7f1068e8a641854b574d7086cbb7fdf8d2a7af46fdddc0429db395f6
6
+ metadata.gz: 65aecd55bf6fbd609565800231c8f87cad533bc25422912927ef9293e4e7e03bcab6bb5fd639320bf184cd7f0b0aa5de9c5a880734e768f6830761b5eaa6afec
7
+ data.tar.gz: ef195ea7ddae0dfa1409659066e5700369659d08fd40390c85f3384c3476ca44defd9c8eee602d6ebb5ef0dc75d63894a76fc5f597d79e57f1bf28598ecd20f5
data/bin/tao_rdfizer CHANGED
@@ -3,6 +3,7 @@ require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
5
  mode = nil
6
+ options = {}
6
7
 
7
8
  ## command line option processing
8
9
  require 'optparse'
@@ -17,6 +18,14 @@ optparse = OptionParser.new do |opts|
17
18
  mode = :spans
18
19
  end
19
20
 
21
+ opts.on('-x', '--x-prefixes', 'without prefixes.') do
22
+ options[:with_prefixes] = false
23
+ end
24
+
25
+ opts.on('-o', '--only-prefixes', 'only prefixes.') do
26
+ options[:only_prefixes] = true
27
+ end
28
+
20
29
  opts.on('-h', '--help', 'displays this screen.') do
21
30
  puts opts
22
31
  exit
@@ -34,7 +43,7 @@ begin
34
43
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
35
44
  annotations = [annotations] unless annotations.class == Array
36
45
  rdfizer = TAO::RDFizer.new(mode)
37
- puts rdfizer.rdfize(annotations)
46
+ puts rdfizer.rdfize(annotations, options)
38
47
  rescue ArgumentError, IOError => e
39
48
  puts e.message
40
49
  end
@@ -14,19 +14,47 @@ class TAO::RDFizer
14
14
  else
15
15
  ERB_ANNOTATIONS_TTL
16
16
  end
17
-
18
17
  @tao_ttl_erb = ERB.new(template, nil, '-')
19
18
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
19
  end
21
20
 
22
- def rdfize(annotations_col)
21
+ def rdfize(annotations_col, options = nil)
22
+ options ||= {}
23
+ only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false
24
+ with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true
25
+
26
+ # check the format
27
+ annotations_col.each do |annotations|
28
+ raise "'target' is missing" unless annotations.has_key? :target
29
+ end
30
+
23
31
  # namespaces
24
32
  namespaces = {}
25
33
 
26
34
  anns = annotations_col.first
27
35
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
36
 
37
+ prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes
38
+
39
+ if only_prefixes
40
+ prefixes_ttl
41
+ else
42
+ annotations_ttl = get_annotations_ttl(annotations_col, namespaces)
43
+ if with_prefixes
44
+ prefixes_ttl + annotations_ttl
45
+ else
46
+ annotations_ttl
47
+ end
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def get_annotations_ttl(annotations_col, namespaces)
54
+ anns = annotations_col.first
55
+
29
56
  unless @mode ==:spans
57
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
58
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
59
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
60
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +62,7 @@ class TAO::RDFizer
34
62
  end
35
63
 
36
64
  denotations = []
65
+ attributes = []
37
66
  relations = []
38
67
  spans = []
39
68
 
@@ -46,34 +75,57 @@ class TAO::RDFizer
46
75
  end
47
76
 
48
77
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
78
+ _denotations = annotations[:denotations] || []
79
+ _attributes = annotations[:attributes] || []
80
+ _relations = annotations[:relations] || []
53
81
  if @mode == :spans && annotations.has_key?(:tracks)
54
82
  annotations[:tracks].each do |track|
55
83
  _denotations += track[:denotations]
84
+ _attributes += track[:attributes]
56
85
  _relations += track[:relations]
57
86
  end
58
87
  end
59
88
 
60
89
  begin
61
- # denotations preprocessing
62
- _denotations.each do |d|
63
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
64
- d[:span_uri] = span_uri
65
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
66
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
67
- end
90
+ unless @mode == :span
91
+ # index attributes
92
+ attributesh = _attributes.inject({}) do |h, a|
93
+ if a[:pred].end_with?('_id')
94
+ subj = a[:subj]
95
+ h[subj] = [] unless h.has_key? subj
96
+ h[subj] << a[:obj]
97
+ end
98
+ h
99
+ end
68
100
 
69
- # relations preprocessing
70
- _relations.each do |r|
71
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
72
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
73
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
101
+ # denotations preprocessing
102
+ _denotations.each do |d|
103
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
104
+ d[:span_uri] = span_uri
105
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
106
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
107
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
108
+ rescue ArgumentError => e
109
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
110
+ end
111
+
112
+ # relations preprocessing
113
+ _relations.each do |r|
114
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
115
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
116
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
117
+ rescue ArgumentError => e
118
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
119
+ end
120
+ else
121
+ # denotations preprocessing
122
+ _denotations.each do |d|
123
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
124
+ d[:span_uri] = span_uri
125
+ rescue ArgumentError => e
126
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
127
+ end
74
128
  end
75
- rescue ArgumentError => e
76
- raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
77
129
  end
78
130
 
79
131
  unless @mode == :annotations
@@ -88,7 +140,7 @@ class TAO::RDFizer
88
140
  s[:text] = text[s[:begin] ... s[:end]]
89
141
  end
90
142
 
91
- # index
143
+ # index spans
92
144
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
93
145
 
94
146
  # add denotation information
@@ -142,11 +194,9 @@ class TAO::RDFizer
142
194
  spans += _spans unless @mode == :annotations
143
195
  end
144
196
 
145
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
197
+ @tao_ttl_erb.result(binding)
146
198
  end
147
199
 
148
- private
149
-
150
200
  def include_parent?(spans, span)
151
201
  # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
152
202
  spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
@@ -178,10 +228,11 @@ class TAO::RDFizer
178
228
  end
179
229
  end
180
230
 
231
+ # variable: denotations, relations
181
232
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
182
233
  <% denotations.each do |d| -%>
183
234
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
184
- rdf:type <%= d[:cls_uri] %> .
235
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
185
236
  <% end -%>
186
237
  <%# relations -%>
187
238
  <% relations.each do |r| -%>
@@ -189,6 +240,7 @@ class TAO::RDFizer
189
240
  <% end -%>
190
241
  HEREDOC
191
242
 
243
+ # variable: spans
192
244
  ERB_SPANS_TTL = <<~HEREDOC
193
245
  <% spans.each do |s| -%>
194
246
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
@@ -205,6 +257,7 @@ class TAO::RDFizer
205
257
  <% end -%>
206
258
  HEREDOC
207
259
 
260
+ # variable: namespaces
208
261
  ERB_PREFIXES_TTL = <<~HEREDOC
209
262
  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
210
263
  @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.12
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2021-05-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.9
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.