tao_rdfizer 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5e3d5c4f2fea0db166fcd917ed89667233b10cd3
4
+ data.tar.gz: 0216a19ec23514d036319e1f9125694d50dfb0e9
5
+ SHA512:
6
+ metadata.gz: 028c6d291dfd986350c641f5265b014231876fa12d7b828df2b468a8fd1b995798ab2fedc2a493482c27aea420293bd8a8c47aa0521cdbd871f539d0483341a0
7
+ data.tar.gz: 3bd1f003d591cb8c849db4751ac168176020b395bdd3e4a3712faf83e204020ab1762e74391bfa1c9f0ed2ae6b590a202284ee972d47c52bf133a7a9ff645b0a
data/bin/tao_rdfizer ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ require 'tao_rdfizer'
3
+ require 'json'
4
+
5
+ begin
6
+ annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
7
+ annotations = [annotations] unless annotations.class == Array
8
+ # mode = :annotations
9
+ mode = :spans
10
+ rdfizer = TAO::RDFizer.new(mode)
11
+ puts rdfizer.rdfize(annotations)
12
+ rescue ArgumentError, IOError => e
13
+ puts e.message
14
+ end
@@ -0,0 +1 @@
1
+ require 'tao_rdfizer/tao_rdfizer'
@@ -0,0 +1,183 @@
1
+ #!/usr/bin/env ruby
2
+ require 'erb'
3
+
4
+ module TAO; end unless defined? TAO
5
+
6
+ class TAO::RDFizer
7
+ # if mode == :spans then produces span descriptions
8
+ # if mode == :annotations then produces annotation descriptions
9
+ # if mode == nil then produces both
10
+ def initialize(mode = nil)
11
+ @mode = mode
12
+ template_filename = unless mode.nil?
13
+ if mode == :annotations
14
+ 'view/tao_annotations_ttl.erb'
15
+ elsif mode == :spans
16
+ 'view/tao_spans_ttl.erb'
17
+ else
18
+ 'view/tao_ttl.erb'
19
+ end
20
+ else
21
+ 'view/tao_ttl.erb'
22
+ end
23
+ @tao_ttl_erb = ERB.new(File.read(template_filename), nil, '-')
24
+ @prefix_ttl_erb = ERB.new(File.read("view/prefixes_ttl.erb"), nil, '-')
25
+ end
26
+
27
+ def rdfize(annotations_col)
28
+ # namespaces
29
+ namespaces = {}
30
+ anns = annotations_col.first
31
+ anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
32
+ raise ArgumentError, "'prj' is a reserved prefix." if namespaces.has_key?('prj')
33
+
34
+ unless @mode ==:spans
35
+ project_uri = 'http://pubannotation.org/projects/' + anns[:project] unless @mode ==:spans
36
+ namespaces['prj'] = project_uri + '/'
37
+ end
38
+
39
+ denotations = []
40
+ relations = []
41
+ spans = []
42
+
43
+ annotations_col.each do |annotations|
44
+ text = annotations[:text]
45
+ text_uri = annotations[:target]
46
+ text_id = begin
47
+ sourcedb, sourceid, divid = get_target_info(text_uri)
48
+ divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
49
+ end
50
+
51
+ # denotations and relations
52
+ _denotations = annotations[:denotations]
53
+ _relations = annotations[:relations]
54
+ _denotations = [] if _denotations.nil?
55
+ _relations = [] if _relations.nil?
56
+ if @mode == :spans && annotations.has_key?(:tracks)
57
+ annotations[:tracks].each do |track|
58
+ _denotations += track[:denotations]
59
+ _relations += track[:relations]
60
+ end
61
+ end
62
+
63
+ # denotations preprocessing
64
+ _denotations.each do |d|
65
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
66
+ d[:span_uri] = span_uri
67
+ d[:obj_uri] = "prj:#{text_id}-#{d[:id]}"
68
+ d[:cls_uri] = find_uri(d[:obj], namespaces)
69
+ end
70
+
71
+ # relations preprocessing
72
+ _relations.each do |r|
73
+ r[:subj_uri] = "prj:#{text_id}-#{r[:subj]}"
74
+ r[:obj_uri] = "prj:#{text_id}-#{r[:obj]}"
75
+ r[:pred_uri] = find_uri(r[:pred], namespaces)
76
+ end
77
+
78
+ unless @mode == :annotations
79
+ # collect spans
80
+ _spans = _denotations.map{|d| d[:span]}
81
+ position = 0
82
+ annotations[:text].scan(/[^\W]*\W/).each do |tok|
83
+ _spans << {:begin => position, :end => position + tok.index(/\W/)}
84
+ position += tok.length
85
+ end
86
+ _spans.uniq!
87
+
88
+ # add_infomation
89
+ _spans.each do |s|
90
+ s[:span_uri] = "<#{text_uri}/spans/#{s[:begin]}-#{s[:end]}>"
91
+ s[:source_uri] = text_uri
92
+ s[:text] = text[s[:begin] ... s[:end]]
93
+ end
94
+
95
+ # index
96
+ spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
97
+
98
+ # add denotation information
99
+ _denotations.each do |d|
100
+ span_uri = d[:span_uri]
101
+ if spanh[span_uri][:denotations].nil?
102
+ spanh[span_uri][:denotations] = [d]
103
+ else
104
+ spanh[span_uri][:denotations] << d
105
+ end
106
+ end
107
+
108
+ _spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
109
+
110
+ ## begin indexing
111
+ len = text.length
112
+ num = _spans.length
113
+
114
+ # initilaize the index
115
+ (0 ... num).each do |i|
116
+ _spans[i][:followings] = []
117
+ _spans[i][:precedings] = []
118
+ _spans[i][:children] = []
119
+ end
120
+
121
+ (0 ... num).each do |i|
122
+ # index the embedded spans
123
+ j = i + 1
124
+ while j < num && _spans[j][:begin] < _spans[i][:end]
125
+ unless include_parent?(_spans[i][:children], _spans[j])
126
+ _spans[i][:children] << _spans[j]
127
+ _spans[j][:parent] = _spans[i]
128
+ end
129
+ j += 1
130
+ end
131
+
132
+ # find the following position
133
+ fp = _spans[i][:end]
134
+ fp += 1 while fp < len && text[fp].match(/\s/)
135
+ next if fp == len
136
+
137
+ # index the following spans
138
+ while j < num && _spans[j][:begin] == fp
139
+ _spans[i][:followings] << _spans[j]
140
+ _spans[j][:precedings] << _spans[i]
141
+ j += 1
142
+ end
143
+ end
144
+ end
145
+
146
+ denotations += _denotations
147
+ relations += _relations
148
+ spans += _spans unless @mode == :annotations
149
+ end
150
+
151
+ ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
152
+ end
153
+
154
+ def include_parent?(spans, span)
155
+ # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
156
+ spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
157
+ return false
158
+ end
159
+
160
+ def get_target_info (text_uri)
161
+ sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
162
+ sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
163
+ divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
164
+
165
+ return sourcedb, sourceid, divid
166
+ end
167
+
168
+ def find_uri (label, namespaces)
169
+ delimiter_position = label.index(':')
170
+ if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
171
+ label
172
+ elsif label =~ %r[^https?://]
173
+ "<#{label}>"
174
+ else
175
+ clabel = if label.match(/^\W+$/)
176
+ 'SYM'
177
+ else
178
+ label.sub(/^\W+/, '').sub(/\W+$/, '')
179
+ end
180
+ namespaces.has_key?('_base') ? "<#{clabel}>" : "prj:#{clabel}"
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,10 @@
1
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
2
+ @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
3
+ <%# namespaces -%>
4
+ <% namespaces.each_key do |p| -%>
5
+ <% if p == '_base' -%>
6
+ @base <%= "<#{namespaces[p]}>" %> .
7
+ <% else -%>
8
+ @prefix <%= p %>: <%= "<#{namespaces[p]}>" %> .
9
+ <% end -%>
10
+ <% end -%>
@@ -0,0 +1,9 @@
1
+ <%# denotations -%>
2
+ <% denotations.each do |d| -%>
3
+ <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
4
+ rdf:type <%= d[:cls_uri] %> .
5
+ <% end -%>
6
+ <%# relations -%>
7
+ <% relations.each do |r| -%>
8
+ <%= r[:subj_uri] %> <%= r[:pred_uri] %> <%= r[:obj_uri] %> .
9
+ <% end -%>
@@ -0,0 +1,14 @@
1
+ <%# spans -%>
2
+ <% spans.each do |s| -%>
3
+ <%= s[:span_uri] %> rdf:type tao:Text_span ;
4
+ tao:belongs_to <%= "<#{s[:source_uri]}>" %> ;
5
+ tao:begins_at <%= s[:begin] %> ;
6
+ tao:ends_at <%= s[:end] %> ;
7
+ <% s[:precedings].each do |s| -%>
8
+ tao:follows <%= s[:span_uri] %> ;
9
+ <% end -%>
10
+ <% s[:children].each do |s| -%>
11
+ tao:contains <%= s[:span_uri] %> ;
12
+ <% end -%>
13
+ tao:has_text "<%= s[:text] %>" .
14
+ <% end -%>
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tao_rdfizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Jin-Dong Kim
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-09-26 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: It uses TAO (text annotation ontology) for representation of annotations
14
+ to text.
15
+ email: jindong.kim@gmail.com
16
+ executables:
17
+ - tao_rdfizer
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/tao_rdfizer
22
+ - lib/tao_rdfizer.rb
23
+ - lib/tao_rdfizer/tao_rdfizer.rb
24
+ - view/prefixes_ttl.erb
25
+ - view/tao_annotations_ttl.erb
26
+ - view/tao_spans_ttl.erb
27
+ homepage: https://github.com/pubannotation/tao_rdfizer
28
+ licenses:
29
+ - MIT
30
+ metadata: {}
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 2.4.8
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
51
+ test_files: []