tao_rdfizer 0.9.11 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +55 -23
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed83a71419d4ec9e1ec001fef079eaee936e70cc569dcfb1b99ba6ad17393417
4
- data.tar.gz: 050befcd8c60632103b911de00cd9413c27ae3ca33aef5ee24b1409ea6309dde
3
+ metadata.gz: 112cb7c3d78d77ca2df491e26f05eeb52334ed291483bb6bfa586bf707304608
4
+ data.tar.gz: e4bbdf5b014fed6779dc79f3b786a2d9d066a2a4dd9a8a1e8a87dec466d908ae
5
5
  SHA512:
6
- metadata.gz: 42b5a8dbca0ed29b43263c0fc66a1c8857ac7edb2b85eb5e0c099e767142634b600cdfa2a5676bd485029b1f876c9117def7b35586a939a389cf19089a5c0148
7
- data.tar.gz: 997fb71df3bae6eca00fe8236e2a99a0a2a06819f12d8590c0c9cff42baefb26b1c145c3b93535e92f20de3edd1f8616c157dec0b55be9c20dec020db7373e0e
6
+ metadata.gz: a291bf21a29a638ec0c3b42b16ff06e7530a8d7245965fb0901f7b4452358d6ecc7876a89c8d8c244a89fb7f47276c9105fb9b301311c09e51b86a5c0e1576dd
7
+ data.tar.gz: ad6a621f7a38146ee0ca0eccaa1b2c709a315b2d39bd000c801c4249f7fee0ecf22600fef4ff8baeb6160cf155e4fa033034d4aff5fa1d0d8cf4e9b78d16a5c8
@@ -19,7 +19,12 @@ class TAO::RDFizer
19
19
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
20
  end
21
21
 
22
- def rdfize(annotations_col)
22
+ def rdfize(annotations_col, with_prefix = true)
23
+ # check the format
24
+ annotations_col.each do |annotations|
25
+ raise "'target' is missing" unless annotations.has_key? :target
26
+ end
27
+
23
28
  # namespaces
24
29
  namespaces = {}
25
30
 
@@ -27,6 +32,7 @@ class TAO::RDFizer
27
32
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
33
 
29
34
  unless @mode ==:spans
35
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
36
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
37
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
38
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +40,7 @@ class TAO::RDFizer
34
40
  end
35
41
 
36
42
  denotations = []
43
+ attributes = []
37
44
  relations = []
38
45
  spans = []
39
46
 
@@ -46,34 +53,57 @@ class TAO::RDFizer
46
53
  end
47
54
 
48
55
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
56
+ _denotations = annotations[:denotations] || []
57
+ _attributes = annotations[:attributes] || []
58
+ _relations = annotations[:relations] || []
53
59
  if @mode == :spans && annotations.has_key?(:tracks)
54
60
  annotations[:tracks].each do |track|
55
61
  _denotations += track[:denotations]
62
+ _attributes += track[:attributes]
56
63
  _relations += track[:relations]
57
64
  end
58
65
  end
59
66
 
60
67
  begin
61
- # denotations preprocessing
62
- _denotations.each do |d|
63
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
64
- d[:span_uri] = span_uri
65
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
66
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
67
- end
68
+ if @mode == :annotations
69
+ # index attributes
70
+ attributesh = _attributes.inject({}) do |h, a|
71
+ if a[:pred].end_with?('_id')
72
+ subj = a[:subj]
73
+ h[subj] = [] unless h.has_key? subj
74
+ h[subj] << a[:obj]
75
+ end
76
+ h
77
+ end
78
+
79
+ # denotations preprocessing
80
+ _denotations.each do |d|
81
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
82
+ d[:span_uri] = span_uri
83
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
84
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
85
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
86
+ rescue ArgumentError => e
87
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
88
+ end
68
89
 
69
- # relations preprocessing
70
- _relations.each do |r|
71
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
72
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
73
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
90
+ # relations preprocessing
91
+ _relations.each do |r|
92
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
93
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
94
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
95
+ rescue ArgumentError => e
96
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
97
+ end
98
+ else
99
+ # denotations preprocessing
100
+ _denotations.each do |d|
101
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
102
+ d[:span_uri] = span_uri
103
+ rescue ArgumentError => e
104
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
105
+ end
74
106
  end
75
- rescue ArgumentError => e
76
- raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
77
107
  end
78
108
 
79
109
  unless @mode == :annotations
@@ -88,7 +118,7 @@ class TAO::RDFizer
88
118
  s[:text] = text[s[:begin] ... s[:end]]
89
119
  end
90
120
 
91
- # index
121
+ # index spans
92
122
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
93
123
 
94
124
  # add denotation information
@@ -142,7 +172,9 @@ class TAO::RDFizer
142
172
  spans += _spans unless @mode == :annotations
143
173
  end
144
174
 
145
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
175
+ ttl = ''
176
+ ttl += @prefix_ttl_erb.result(binding) if with_prefix
177
+ ttl += @tao_ttl_erb.result(binding)
146
178
  end
147
179
 
148
180
  private
@@ -172,7 +204,7 @@ class TAO::RDFizer
172
204
  clabel = if label.match(/^\W+$/)
173
205
  'SYM'
174
206
  else
175
- label.sub(/^\W+/, '').sub(/\W+$/, '').gsub(/ +/, '_')
207
+ label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
176
208
  end
177
209
  namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
178
210
  end
@@ -181,7 +213,7 @@ class TAO::RDFizer
181
213
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
182
214
  <% denotations.each do |d| -%>
183
215
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
184
- rdf:type <%= d[:cls_uri] %> .
216
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
185
217
  <% end -%>
186
218
  <%# relations -%>
187
219
  <% relations.each do |r| -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.11
4
+ version: 0.10.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2021-05-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.9
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.