tao_rdfizer 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/tao_rdfizer +14 -0
- data/lib/tao_rdfizer.rb +1 -0
- data/lib/tao_rdfizer/tao_rdfizer.rb +183 -0
- data/view/prefixes_ttl.erb +10 -0
- data/view/tao_annotations_ttl.erb +9 -0
- data/view/tao_spans_ttl.erb +14 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5e3d5c4f2fea0db166fcd917ed89667233b10cd3
|
4
|
+
data.tar.gz: 0216a19ec23514d036319e1f9125694d50dfb0e9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 028c6d291dfd986350c641f5265b014231876fa12d7b828df2b468a8fd1b995798ab2fedc2a493482c27aea420293bd8a8c47aa0521cdbd871f539d0483341a0
|
7
|
+
data.tar.gz: 3bd1f003d591cb8c849db4751ac168176020b395bdd3e4a3712faf83e204020ab1762e74391bfa1c9f0ed2ae6b590a202284ee972d47c52bf133a7a9ff645b0a
|
data/bin/tao_rdfizer
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'tao_rdfizer'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
begin
|
6
|
+
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
7
|
+
annotations = [annotations] unless annotations.class == Array
|
8
|
+
# mode = :annotations
|
9
|
+
mode = :spans
|
10
|
+
rdfizer = TAO::RDFizer.new(mode)
|
11
|
+
puts rdfizer.rdfize(annotations)
|
12
|
+
rescue ArgumentError, IOError => e
|
13
|
+
puts e.message
|
14
|
+
end
|
data/lib/tao_rdfizer.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'tao_rdfizer/tao_rdfizer'
|
@@ -0,0 +1,183 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
module TAO; end unless defined? TAO
|
5
|
+
|
6
|
+
class TAO::RDFizer
|
7
|
+
# if mode == :spans then produces span descriptions
|
8
|
+
# if mode == :annotations then produces annotation descriptions
|
9
|
+
# if mode == nil then produces both
|
10
|
+
def initialize(mode = nil)
|
11
|
+
@mode = mode
|
12
|
+
template_filename = unless mode.nil?
|
13
|
+
if mode == :annotations
|
14
|
+
'view/tao_annotations_ttl.erb'
|
15
|
+
elsif mode == :spans
|
16
|
+
'view/tao_spans_ttl.erb'
|
17
|
+
else
|
18
|
+
'view/tao_ttl.erb'
|
19
|
+
end
|
20
|
+
else
|
21
|
+
'view/tao_ttl.erb'
|
22
|
+
end
|
23
|
+
@tao_ttl_erb = ERB.new(File.read(template_filename), nil, '-')
|
24
|
+
@prefix_ttl_erb = ERB.new(File.read("view/prefixes_ttl.erb"), nil, '-')
|
25
|
+
end
|
26
|
+
|
27
|
+
def rdfize(annotations_col)
|
28
|
+
# namespaces
|
29
|
+
namespaces = {}
|
30
|
+
anns = annotations_col.first
|
31
|
+
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
32
|
+
raise ArgumentError, "'prj' is a reserved prefix." if namespaces.has_key?('prj')
|
33
|
+
|
34
|
+
unless @mode ==:spans
|
35
|
+
project_uri = 'http://pubannotation.org/projects/' + anns[:project] unless @mode ==:spans
|
36
|
+
namespaces['prj'] = project_uri + '/'
|
37
|
+
end
|
38
|
+
|
39
|
+
denotations = []
|
40
|
+
relations = []
|
41
|
+
spans = []
|
42
|
+
|
43
|
+
annotations_col.each do |annotations|
|
44
|
+
text = annotations[:text]
|
45
|
+
text_uri = annotations[:target]
|
46
|
+
text_id = begin
|
47
|
+
sourcedb, sourceid, divid = get_target_info(text_uri)
|
48
|
+
divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
|
49
|
+
end
|
50
|
+
|
51
|
+
# denotations and relations
|
52
|
+
_denotations = annotations[:denotations]
|
53
|
+
_relations = annotations[:relations]
|
54
|
+
_denotations = [] if _denotations.nil?
|
55
|
+
_relations = [] if _relations.nil?
|
56
|
+
if @mode == :spans && annotations.has_key?(:tracks)
|
57
|
+
annotations[:tracks].each do |track|
|
58
|
+
_denotations += track[:denotations]
|
59
|
+
_relations += track[:relations]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# denotations preprocessing
|
64
|
+
_denotations.each do |d|
|
65
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
66
|
+
d[:span_uri] = span_uri
|
67
|
+
d[:obj_uri] = "prj:#{text_id}-#{d[:id]}"
|
68
|
+
d[:cls_uri] = find_uri(d[:obj], namespaces)
|
69
|
+
end
|
70
|
+
|
71
|
+
# relations preprocessing
|
72
|
+
_relations.each do |r|
|
73
|
+
r[:subj_uri] = "prj:#{text_id}-#{r[:subj]}"
|
74
|
+
r[:obj_uri] = "prj:#{text_id}-#{r[:obj]}"
|
75
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces)
|
76
|
+
end
|
77
|
+
|
78
|
+
unless @mode == :annotations
|
79
|
+
# collect spans
|
80
|
+
_spans = _denotations.map{|d| d[:span]}
|
81
|
+
position = 0
|
82
|
+
annotations[:text].scan(/[^\W]*\W/).each do |tok|
|
83
|
+
_spans << {:begin => position, :end => position + tok.index(/\W/)}
|
84
|
+
position += tok.length
|
85
|
+
end
|
86
|
+
_spans.uniq!
|
87
|
+
|
88
|
+
# add_infomation
|
89
|
+
_spans.each do |s|
|
90
|
+
s[:span_uri] = "<#{text_uri}/spans/#{s[:begin]}-#{s[:end]}>"
|
91
|
+
s[:source_uri] = text_uri
|
92
|
+
s[:text] = text[s[:begin] ... s[:end]]
|
93
|
+
end
|
94
|
+
|
95
|
+
# index
|
96
|
+
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
97
|
+
|
98
|
+
# add denotation information
|
99
|
+
_denotations.each do |d|
|
100
|
+
span_uri = d[:span_uri]
|
101
|
+
if spanh[span_uri][:denotations].nil?
|
102
|
+
spanh[span_uri][:denotations] = [d]
|
103
|
+
else
|
104
|
+
spanh[span_uri][:denotations] << d
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
_spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
|
109
|
+
|
110
|
+
## begin indexing
|
111
|
+
len = text.length
|
112
|
+
num = _spans.length
|
113
|
+
|
114
|
+
# initilaize the index
|
115
|
+
(0 ... num).each do |i|
|
116
|
+
_spans[i][:followings] = []
|
117
|
+
_spans[i][:precedings] = []
|
118
|
+
_spans[i][:children] = []
|
119
|
+
end
|
120
|
+
|
121
|
+
(0 ... num).each do |i|
|
122
|
+
# index the embedded spans
|
123
|
+
j = i + 1
|
124
|
+
while j < num && _spans[j][:begin] < _spans[i][:end]
|
125
|
+
unless include_parent?(_spans[i][:children], _spans[j])
|
126
|
+
_spans[i][:children] << _spans[j]
|
127
|
+
_spans[j][:parent] = _spans[i]
|
128
|
+
end
|
129
|
+
j += 1
|
130
|
+
end
|
131
|
+
|
132
|
+
# find the following position
|
133
|
+
fp = _spans[i][:end]
|
134
|
+
fp += 1 while fp < len && text[fp].match(/\s/)
|
135
|
+
next if fp == len
|
136
|
+
|
137
|
+
# index the following spans
|
138
|
+
while j < num && _spans[j][:begin] == fp
|
139
|
+
_spans[i][:followings] << _spans[j]
|
140
|
+
_spans[j][:precedings] << _spans[i]
|
141
|
+
j += 1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
denotations += _denotations
|
147
|
+
relations += _relations
|
148
|
+
spans += _spans unless @mode == :annotations
|
149
|
+
end
|
150
|
+
|
151
|
+
ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
|
152
|
+
end
|
153
|
+
|
154
|
+
def include_parent?(spans, span)
|
155
|
+
# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
|
156
|
+
spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
|
157
|
+
return false
|
158
|
+
end
|
159
|
+
|
160
|
+
def get_target_info (text_uri)
|
161
|
+
sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
|
162
|
+
sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
|
163
|
+
divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
|
164
|
+
|
165
|
+
return sourcedb, sourceid, divid
|
166
|
+
end
|
167
|
+
|
168
|
+
def find_uri (label, namespaces)
|
169
|
+
delimiter_position = label.index(':')
|
170
|
+
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
171
|
+
label
|
172
|
+
elsif label =~ %r[^https?://]
|
173
|
+
"<#{label}>"
|
174
|
+
else
|
175
|
+
clabel = if label.match(/^\W+$/)
|
176
|
+
'SYM'
|
177
|
+
else
|
178
|
+
label.sub(/^\W+/, '').sub(/\W+$/, '')
|
179
|
+
end
|
180
|
+
namespaces.has_key?('_base') ? "<#{clabel}>" : "prj:#{clabel}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
2
|
+
@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
|
3
|
+
<%# namespaces -%>
|
4
|
+
<% namespaces.each_key do |p| -%>
|
5
|
+
<% if p == '_base' -%>
|
6
|
+
@base <%= "<#{namespaces[p]}>" %> .
|
7
|
+
<% else -%>
|
8
|
+
@prefix <%= p %>: <%= "<#{namespaces[p]}>" %> .
|
9
|
+
<% end -%>
|
10
|
+
<% end -%>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<%# denotations -%>
|
2
|
+
<% denotations.each do |d| -%>
|
3
|
+
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
4
|
+
rdf:type <%= d[:cls_uri] %> .
|
5
|
+
<% end -%>
|
6
|
+
<%# relations -%>
|
7
|
+
<% relations.each do |r| -%>
|
8
|
+
<%= r[:subj_uri] %> <%= r[:pred_uri] %> <%= r[:obj_uri] %> .
|
9
|
+
<% end -%>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<%# spans -%>
|
2
|
+
<% spans.each do |s| -%>
|
3
|
+
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
4
|
+
tao:belongs_to <%= "<#{s[:source_uri]}>" %> ;
|
5
|
+
tao:begins_at <%= s[:begin] %> ;
|
6
|
+
tao:ends_at <%= s[:end] %> ;
|
7
|
+
<% s[:precedings].each do |s| -%>
|
8
|
+
tao:follows <%= s[:span_uri] %> ;
|
9
|
+
<% end -%>
|
10
|
+
<% s[:children].each do |s| -%>
|
11
|
+
tao:contains <%= s[:span_uri] %> ;
|
12
|
+
<% end -%>
|
13
|
+
tao:has_text "<%= s[:text] %>" .
|
14
|
+
<% end -%>
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tao_rdfizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jin-Dong Kim
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-09-26 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
|
+
to text.
|
15
|
+
email: jindong.kim@gmail.com
|
16
|
+
executables:
|
17
|
+
- tao_rdfizer
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- bin/tao_rdfizer
|
22
|
+
- lib/tao_rdfizer.rb
|
23
|
+
- lib/tao_rdfizer/tao_rdfizer.rb
|
24
|
+
- view/prefixes_ttl.erb
|
25
|
+
- view/tao_annotations_ttl.erb
|
26
|
+
- view/tao_spans_ttl.erb
|
27
|
+
homepage: https://github.com/pubannotation/tao_rdfizer
|
28
|
+
licenses:
|
29
|
+
- MIT
|
30
|
+
metadata: {}
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options: []
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 2.4.8
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|
51
|
+
test_files: []
|