tao_rdfizer 0.9.11 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tao_rdfizer/tao_rdfizer.rb +55 -23
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed83a71419d4ec9e1ec001fef079eaee936e70cc569dcfb1b99ba6ad17393417
4
- data.tar.gz: 050befcd8c60632103b911de00cd9413c27ae3ca33aef5ee24b1409ea6309dde
3
+ metadata.gz: 112cb7c3d78d77ca2df491e26f05eeb52334ed291483bb6bfa586bf707304608
4
+ data.tar.gz: e4bbdf5b014fed6779dc79f3b786a2d9d066a2a4dd9a8a1e8a87dec466d908ae
5
5
  SHA512:
6
- metadata.gz: 42b5a8dbca0ed29b43263c0fc66a1c8857ac7edb2b85eb5e0c099e767142634b600cdfa2a5676bd485029b1f876c9117def7b35586a939a389cf19089a5c0148
7
- data.tar.gz: 997fb71df3bae6eca00fe8236e2a99a0a2a06819f12d8590c0c9cff42baefb26b1c145c3b93535e92f20de3edd1f8616c157dec0b55be9c20dec020db7373e0e
6
+ metadata.gz: a291bf21a29a638ec0c3b42b16ff06e7530a8d7245965fb0901f7b4452358d6ecc7876a89c8d8c244a89fb7f47276c9105fb9b301311c09e51b86a5c0e1576dd
7
+ data.tar.gz: ad6a621f7a38146ee0ca0eccaa1b2c709a315b2d39bd000c801c4249f7fee0ecf22600fef4ff8baeb6160cf155e4fa033034d4aff5fa1d0d8cf4e9b78d16a5c8
@@ -19,7 +19,12 @@ class TAO::RDFizer
19
19
  @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-')
20
20
  end
21
21
 
22
- def rdfize(annotations_col)
22
+ def rdfize(annotations_col, with_prefix = true)
23
+ # check the format
24
+ annotations_col.each do |annotations|
25
+ raise "'target' is missing" unless annotations.has_key? :target
26
+ end
27
+
23
28
  # namespaces
24
29
  namespaces = {}
25
30
 
@@ -27,6 +32,7 @@ class TAO::RDFizer
27
32
  anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
28
33
 
29
34
  unless @mode ==:spans
35
+ raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project)
30
36
  prefix_for_this = anns[:project].downcase.gsub(/ /, '_')
31
37
  raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this)
32
38
  project_uri = 'http://pubannotation.org/projects/' + anns[:project]
@@ -34,6 +40,7 @@ class TAO::RDFizer
34
40
  end
35
41
 
36
42
  denotations = []
43
+ attributes = []
37
44
  relations = []
38
45
  spans = []
39
46
 
@@ -46,34 +53,57 @@ class TAO::RDFizer
46
53
  end
47
54
 
48
55
  # denotations and relations
49
- _denotations = annotations[:denotations]
50
- _relations = annotations[:relations]
51
- _denotations = [] if _denotations.nil?
52
- _relations = [] if _relations.nil?
56
+ _denotations = annotations[:denotations] || []
57
+ _attributes = annotations[:attributes] || []
58
+ _relations = annotations[:relations] || []
53
59
  if @mode == :spans && annotations.has_key?(:tracks)
54
60
  annotations[:tracks].each do |track|
55
61
  _denotations += track[:denotations]
62
+ _attributes += track[:attributes]
56
63
  _relations += track[:relations]
57
64
  end
58
65
  end
59
66
 
60
67
  begin
61
- # denotations preprocessing
62
- _denotations.each do |d|
63
- span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
64
- d[:span_uri] = span_uri
65
- d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
66
- d[:cls_uri] = find_uri(d[:obj], namespaces, prefix_for_this)
67
- end
68
+ if @mode == :annotations
69
+ # index attributes
70
+ attributesh = _attributes.inject({}) do |h, a|
71
+ if a[:pred].end_with?('_id')
72
+ subj = a[:subj]
73
+ h[subj] = [] unless h.has_key? subj
74
+ h[subj] << a[:obj]
75
+ end
76
+ h
77
+ end
78
+
79
+ # denotations preprocessing
80
+ _denotations.each do |d|
81
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
82
+ d[:span_uri] = span_uri
83
+ d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}"
84
+ class_uris = (attributesh[d[:id]] || []).push(d[:obj])
85
+ d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)}
86
+ rescue ArgumentError => e
87
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
88
+ end
68
89
 
69
- # relations preprocessing
70
- _relations.each do |r|
71
- r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
72
- r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
73
- r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
90
+ # relations preprocessing
91
+ _relations.each do |r|
92
+ r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}"
93
+ r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}"
94
+ r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this)
95
+ rescue ArgumentError => e
96
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message
97
+ end
98
+ else
99
+ # denotations preprocessing
100
+ _denotations.each do |d|
101
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
102
+ d[:span_uri] = span_uri
103
+ rescue ArgumentError => e
104
+ raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message
105
+ end
74
106
  end
75
- rescue ArgumentError => e
76
- raise ArgumentError, "[#{sourcedb}-#{sourceid}] " + e
77
107
  end
78
108
 
79
109
  unless @mode == :annotations
@@ -88,7 +118,7 @@ class TAO::RDFizer
88
118
  s[:text] = text[s[:begin] ... s[:end]]
89
119
  end
90
120
 
91
- # index
121
+ # index spans
92
122
  spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
93
123
 
94
124
  # add denotation information
@@ -142,7 +172,9 @@ class TAO::RDFizer
142
172
  spans += _spans unless @mode == :annotations
143
173
  end
144
174
 
145
- ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
175
+ ttl = ''
176
+ ttl += @prefix_ttl_erb.result(binding) if with_prefix
177
+ ttl += @tao_ttl_erb.result(binding)
146
178
  end
147
179
 
148
180
  private
@@ -172,7 +204,7 @@ class TAO::RDFizer
172
204
  clabel = if label.match(/^\W+$/)
173
205
  'SYM'
174
206
  else
175
- label.sub(/^\W+/, '').sub(/\W+$/, '').gsub(/ +/, '_')
207
+ label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_')
176
208
  end
177
209
  namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}"
178
210
  end
@@ -181,7 +213,7 @@ class TAO::RDFizer
181
213
  ERB_ANNOTATIONS_TTL = <<~HEREDOC
182
214
  <% denotations.each do |d| -%>
183
215
  <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
184
- rdf:type <%= d[:cls_uri] %> .
216
+ <%= d[:class_uris].map{|c| "\trdf:type " + c}.join(" ;\n") + " ." %>
185
217
  <% end -%>
186
218
  <%# relations -%>
187
219
  <% relations.each do |r| -%>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.11
4
+ version: 0.10.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-17 00:00:00.000000000 Z
11
+ date: 2021-05-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.
@@ -40,8 +40,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  requirements: []
43
- rubyforge_project:
44
- rubygems_version: 2.7.9
43
+ rubygems_version: 3.0.9
45
44
  signing_key:
46
45
  specification_version: 4
47
46
  summary: A RDF statement generator for annotations in the PubAnnotation JSON format.