tao_rdfizer 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5e3d5c4f2fea0db166fcd917ed89667233b10cd3
4
+ data.tar.gz: 0216a19ec23514d036319e1f9125694d50dfb0e9
5
+ SHA512:
6
+ metadata.gz: 028c6d291dfd986350c641f5265b014231876fa12d7b828df2b468a8fd1b995798ab2fedc2a493482c27aea420293bd8a8c47aa0521cdbd871f539d0483341a0
7
+ data.tar.gz: 3bd1f003d591cb8c849db4751ac168176020b395bdd3e4a3712faf83e204020ab1762e74391bfa1c9f0ed2ae6b590a202284ee972d47c52bf133a7a9ff645b0a
data/bin/tao_rdfizer ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ require 'tao_rdfizer'
3
+ require 'json'
4
+
5
+ begin
6
+ annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
7
+ annotations = [annotations] unless annotations.class == Array
8
+ # mode = :annotations
9
+ mode = :spans
10
+ rdfizer = TAO::RDFizer.new(mode)
11
+ puts rdfizer.rdfize(annotations)
12
+ rescue ArgumentError, IOError => e
13
+ puts e.message
14
+ end
@@ -0,0 +1 @@
1
+ require 'tao_rdfizer/tao_rdfizer'
@@ -0,0 +1,183 @@
1
+ #!/usr/bin/env ruby
2
+ require 'erb'
3
+
4
+ module TAO; end unless defined? TAO
5
+
6
+ class TAO::RDFizer
7
+ # if mode == :spans then produces span descriptions
8
+ # if mode == :annotations then produces annotation descriptions
9
+ # if mode == nil then produces both
10
+ def initialize(mode = nil)
11
+ @mode = mode
12
+ template_filename = unless mode.nil?
13
+ if mode == :annotations
14
+ 'view/tao_annotations_ttl.erb'
15
+ elsif mode == :spans
16
+ 'view/tao_spans_ttl.erb'
17
+ else
18
+ 'view/tao_ttl.erb'
19
+ end
20
+ else
21
+ 'view/tao_ttl.erb'
22
+ end
23
+ @tao_ttl_erb = ERB.new(File.read(template_filename), nil, '-')
24
+ @prefix_ttl_erb = ERB.new(File.read("view/prefixes_ttl.erb"), nil, '-')
25
+ end
26
+
27
+ def rdfize(annotations_col)
28
+ # namespaces
29
+ namespaces = {}
30
+ anns = annotations_col.first
31
+ anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
32
+ raise ArgumentError, "'prj' is a reserved prefix." if namespaces.has_key?('prj')
33
+
34
+ unless @mode ==:spans
35
+ project_uri = 'http://pubannotation.org/projects/' + anns[:project] unless @mode ==:spans
36
+ namespaces['prj'] = project_uri + '/'
37
+ end
38
+
39
+ denotations = []
40
+ relations = []
41
+ spans = []
42
+
43
+ annotations_col.each do |annotations|
44
+ text = annotations[:text]
45
+ text_uri = annotations[:target]
46
+ text_id = begin
47
+ sourcedb, sourceid, divid = get_target_info(text_uri)
48
+ divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
49
+ end
50
+
51
+ # denotations and relations
52
+ _denotations = annotations[:denotations]
53
+ _relations = annotations[:relations]
54
+ _denotations = [] if _denotations.nil?
55
+ _relations = [] if _relations.nil?
56
+ if @mode == :spans && annotations.has_key?(:tracks)
57
+ annotations[:tracks].each do |track|
58
+ _denotations += track[:denotations]
59
+ _relations += track[:relations]
60
+ end
61
+ end
62
+
63
+ # denotations preprocessing
64
+ _denotations.each do |d|
65
+ span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
66
+ d[:span_uri] = span_uri
67
+ d[:obj_uri] = "prj:#{text_id}-#{d[:id]}"
68
+ d[:cls_uri] = find_uri(d[:obj], namespaces)
69
+ end
70
+
71
+ # relations preprocessing
72
+ _relations.each do |r|
73
+ r[:subj_uri] = "prj:#{text_id}-#{r[:subj]}"
74
+ r[:obj_uri] = "prj:#{text_id}-#{r[:obj]}"
75
+ r[:pred_uri] = find_uri(r[:pred], namespaces)
76
+ end
77
+
78
+ unless @mode == :annotations
79
+ # collect spans
80
+ _spans = _denotations.map{|d| d[:span]}
81
+ position = 0
82
+ annotations[:text].scan(/[^\W]*\W/).each do |tok|
83
+ _spans << {:begin => position, :end => position + tok.index(/\W/)}
84
+ position += tok.length
85
+ end
86
+ _spans.uniq!
87
+
88
+ # add_infomation
89
+ _spans.each do |s|
90
+ s[:span_uri] = "<#{text_uri}/spans/#{s[:begin]}-#{s[:end]}>"
91
+ s[:source_uri] = text_uri
92
+ s[:text] = text[s[:begin] ... s[:end]]
93
+ end
94
+
95
+ # index
96
+ spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
97
+
98
+ # add denotation information
99
+ _denotations.each do |d|
100
+ span_uri = d[:span_uri]
101
+ if spanh[span_uri][:denotations].nil?
102
+ spanh[span_uri][:denotations] = [d]
103
+ else
104
+ spanh[span_uri][:denotations] << d
105
+ end
106
+ end
107
+
108
+ _spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
109
+
110
+ ## begin indexing
111
+ len = text.length
112
+ num = _spans.length
113
+
114
+ # initilaize the index
115
+ (0 ... num).each do |i|
116
+ _spans[i][:followings] = []
117
+ _spans[i][:precedings] = []
118
+ _spans[i][:children] = []
119
+ end
120
+
121
+ (0 ... num).each do |i|
122
+ # index the embedded spans
123
+ j = i + 1
124
+ while j < num && _spans[j][:begin] < _spans[i][:end]
125
+ unless include_parent?(_spans[i][:children], _spans[j])
126
+ _spans[i][:children] << _spans[j]
127
+ _spans[j][:parent] = _spans[i]
128
+ end
129
+ j += 1
130
+ end
131
+
132
+ # find the following position
133
+ fp = _spans[i][:end]
134
+ fp += 1 while fp < len && text[fp].match(/\s/)
135
+ next if fp == len
136
+
137
+ # index the following spans
138
+ while j < num && _spans[j][:begin] == fp
139
+ _spans[i][:followings] << _spans[j]
140
+ _spans[j][:precedings] << _spans[i]
141
+ j += 1
142
+ end
143
+ end
144
+ end
145
+
146
+ denotations += _denotations
147
+ relations += _relations
148
+ spans += _spans unless @mode == :annotations
149
+ end
150
+
151
+ ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
152
+ end
153
+
154
+ def include_parent?(spans, span)
155
+ # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
156
+ spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
157
+ return false
158
+ end
159
+
160
+ def get_target_info (text_uri)
161
+ sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
162
+ sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
163
+ divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
164
+
165
+ return sourcedb, sourceid, divid
166
+ end
167
+
168
+ def find_uri (label, namespaces)
169
+ delimiter_position = label.index(':')
170
+ if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
171
+ label
172
+ elsif label =~ %r[^https?://]
173
+ "<#{label}>"
174
+ else
175
+ clabel = if label.match(/^\W+$/)
176
+ 'SYM'
177
+ else
178
+ label.sub(/^\W+/, '').sub(/\W+$/, '')
179
+ end
180
+ namespaces.has_key?('_base') ? "<#{clabel}>" : "prj:#{clabel}"
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,10 @@
1
+ @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
2
+ @prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
3
+ <%# namespaces -%>
4
+ <% namespaces.each_key do |p| -%>
5
+ <% if p == '_base' -%>
6
+ @base <%= "<#{namespaces[p]}>" %> .
7
+ <% else -%>
8
+ @prefix <%= p %>: <%= "<#{namespaces[p]}>" %> .
9
+ <% end -%>
10
+ <% end -%>
@@ -0,0 +1,9 @@
1
+ <%# denotations -%>
2
+ <% denotations.each do |d| -%>
3
+ <%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
4
+ rdf:type <%= d[:cls_uri] %> .
5
+ <% end -%>
6
+ <%# relations -%>
7
+ <% relations.each do |r| -%>
8
+ <%= r[:subj_uri] %> <%= r[:pred_uri] %> <%= r[:obj_uri] %> .
9
+ <% end -%>
@@ -0,0 +1,14 @@
1
+ <%# spans -%>
2
+ <% spans.each do |s| -%>
3
+ <%= s[:span_uri] %> rdf:type tao:Text_span ;
4
+ tao:belongs_to <%= "<#{s[:source_uri]}>" %> ;
5
+ tao:begins_at <%= s[:begin] %> ;
6
+ tao:ends_at <%= s[:end] %> ;
7
+ <% s[:precedings].each do |s| -%>
8
+ tao:follows <%= s[:span_uri] %> ;
9
+ <% end -%>
10
+ <% s[:children].each do |s| -%>
11
+ tao:contains <%= s[:span_uri] %> ;
12
+ <% end -%>
13
+ tao:has_text "<%= s[:text] %>" .
14
+ <% end -%>
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tao_rdfizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Jin-Dong Kim
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-09-26 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: It uses TAO (text annotation ontology) for representation of annotations
14
+ to text.
15
+ email: jindong.kim@gmail.com
16
+ executables:
17
+ - tao_rdfizer
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/tao_rdfizer
22
+ - lib/tao_rdfizer.rb
23
+ - lib/tao_rdfizer/tao_rdfizer.rb
24
+ - view/prefixes_ttl.erb
25
+ - view/tao_annotations_ttl.erb
26
+ - view/tao_spans_ttl.erb
27
+ homepage: https://github.com/pubannotation/tao_rdfizer
28
+ licenses:
29
+ - MIT
30
+ metadata: {}
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 2.4.8
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
51
+ test_files: []