tao_rdfizer 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/tao_rdfizer +14 -0
- data/lib/tao_rdfizer.rb +1 -0
- data/lib/tao_rdfizer/tao_rdfizer.rb +183 -0
- data/view/prefixes_ttl.erb +10 -0
- data/view/tao_annotations_ttl.erb +9 -0
- data/view/tao_spans_ttl.erb +14 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5e3d5c4f2fea0db166fcd917ed89667233b10cd3
|
4
|
+
data.tar.gz: 0216a19ec23514d036319e1f9125694d50dfb0e9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 028c6d291dfd986350c641f5265b014231876fa12d7b828df2b468a8fd1b995798ab2fedc2a493482c27aea420293bd8a8c47aa0521cdbd871f539d0483341a0
|
7
|
+
data.tar.gz: 3bd1f003d591cb8c849db4751ac168176020b395bdd3e4a3712faf83e204020ab1762e74391bfa1c9f0ed2ae6b590a202284ee972d47c52bf133a7a9ff645b0a
|
data/bin/tao_rdfizer
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'tao_rdfizer'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
begin
|
6
|
+
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
7
|
+
annotations = [annotations] unless annotations.class == Array
|
8
|
+
# mode = :annotations
|
9
|
+
mode = :spans
|
10
|
+
rdfizer = TAO::RDFizer.new(mode)
|
11
|
+
puts rdfizer.rdfize(annotations)
|
12
|
+
rescue ArgumentError, IOError => e
|
13
|
+
puts e.message
|
14
|
+
end
|
data/lib/tao_rdfizer.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'tao_rdfizer/tao_rdfizer'
|
@@ -0,0 +1,183 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
module TAO; end unless defined? TAO
|
5
|
+
|
6
|
+
class TAO::RDFizer
|
7
|
+
# if mode == :spans then produces span descriptions
|
8
|
+
# if mode == :annotations then produces annotation descriptions
|
9
|
+
# if mode == nil then produces both
|
10
|
+
def initialize(mode = nil)
|
11
|
+
@mode = mode
|
12
|
+
template_filename = unless mode.nil?
|
13
|
+
if mode == :annotations
|
14
|
+
'view/tao_annotations_ttl.erb'
|
15
|
+
elsif mode == :spans
|
16
|
+
'view/tao_spans_ttl.erb'
|
17
|
+
else
|
18
|
+
'view/tao_ttl.erb'
|
19
|
+
end
|
20
|
+
else
|
21
|
+
'view/tao_ttl.erb'
|
22
|
+
end
|
23
|
+
@tao_ttl_erb = ERB.new(File.read(template_filename), nil, '-')
|
24
|
+
@prefix_ttl_erb = ERB.new(File.read("view/prefixes_ttl.erb"), nil, '-')
|
25
|
+
end
|
26
|
+
|
27
|
+
def rdfize(annotations_col)
|
28
|
+
# namespaces
|
29
|
+
namespaces = {}
|
30
|
+
anns = annotations_col.first
|
31
|
+
anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil?
|
32
|
+
raise ArgumentError, "'prj' is a reserved prefix." if namespaces.has_key?('prj')
|
33
|
+
|
34
|
+
unless @mode ==:spans
|
35
|
+
project_uri = 'http://pubannotation.org/projects/' + anns[:project] unless @mode ==:spans
|
36
|
+
namespaces['prj'] = project_uri + '/'
|
37
|
+
end
|
38
|
+
|
39
|
+
denotations = []
|
40
|
+
relations = []
|
41
|
+
spans = []
|
42
|
+
|
43
|
+
annotations_col.each do |annotations|
|
44
|
+
text = annotations[:text]
|
45
|
+
text_uri = annotations[:target]
|
46
|
+
text_id = begin
|
47
|
+
sourcedb, sourceid, divid = get_target_info(text_uri)
|
48
|
+
divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}"
|
49
|
+
end
|
50
|
+
|
51
|
+
# denotations and relations
|
52
|
+
_denotations = annotations[:denotations]
|
53
|
+
_relations = annotations[:relations]
|
54
|
+
_denotations = [] if _denotations.nil?
|
55
|
+
_relations = [] if _relations.nil?
|
56
|
+
if @mode == :spans && annotations.has_key?(:tracks)
|
57
|
+
annotations[:tracks].each do |track|
|
58
|
+
_denotations += track[:denotations]
|
59
|
+
_relations += track[:relations]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# denotations preprocessing
|
64
|
+
_denotations.each do |d|
|
65
|
+
span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>"
|
66
|
+
d[:span_uri] = span_uri
|
67
|
+
d[:obj_uri] = "prj:#{text_id}-#{d[:id]}"
|
68
|
+
d[:cls_uri] = find_uri(d[:obj], namespaces)
|
69
|
+
end
|
70
|
+
|
71
|
+
# relations preprocessing
|
72
|
+
_relations.each do |r|
|
73
|
+
r[:subj_uri] = "prj:#{text_id}-#{r[:subj]}"
|
74
|
+
r[:obj_uri] = "prj:#{text_id}-#{r[:obj]}"
|
75
|
+
r[:pred_uri] = find_uri(r[:pred], namespaces)
|
76
|
+
end
|
77
|
+
|
78
|
+
unless @mode == :annotations
|
79
|
+
# collect spans
|
80
|
+
_spans = _denotations.map{|d| d[:span]}
|
81
|
+
position = 0
|
82
|
+
annotations[:text].scan(/[^\W]*\W/).each do |tok|
|
83
|
+
_spans << {:begin => position, :end => position + tok.index(/\W/)}
|
84
|
+
position += tok.length
|
85
|
+
end
|
86
|
+
_spans.uniq!
|
87
|
+
|
88
|
+
# add_infomation
|
89
|
+
_spans.each do |s|
|
90
|
+
s[:span_uri] = "<#{text_uri}/spans/#{s[:begin]}-#{s[:end]}>"
|
91
|
+
s[:source_uri] = text_uri
|
92
|
+
s[:text] = text[s[:begin] ... s[:end]]
|
93
|
+
end
|
94
|
+
|
95
|
+
# index
|
96
|
+
spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r}
|
97
|
+
|
98
|
+
# add denotation information
|
99
|
+
_denotations.each do |d|
|
100
|
+
span_uri = d[:span_uri]
|
101
|
+
if spanh[span_uri][:denotations].nil?
|
102
|
+
spanh[span_uri][:denotations] = [d]
|
103
|
+
else
|
104
|
+
spanh[span_uri][:denotations] << d
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
_spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]}
|
109
|
+
|
110
|
+
## begin indexing
|
111
|
+
len = text.length
|
112
|
+
num = _spans.length
|
113
|
+
|
114
|
+
# initilaize the index
|
115
|
+
(0 ... num).each do |i|
|
116
|
+
_spans[i][:followings] = []
|
117
|
+
_spans[i][:precedings] = []
|
118
|
+
_spans[i][:children] = []
|
119
|
+
end
|
120
|
+
|
121
|
+
(0 ... num).each do |i|
|
122
|
+
# index the embedded spans
|
123
|
+
j = i + 1
|
124
|
+
while j < num && _spans[j][:begin] < _spans[i][:end]
|
125
|
+
unless include_parent?(_spans[i][:children], _spans[j])
|
126
|
+
_spans[i][:children] << _spans[j]
|
127
|
+
_spans[j][:parent] = _spans[i]
|
128
|
+
end
|
129
|
+
j += 1
|
130
|
+
end
|
131
|
+
|
132
|
+
# find the following position
|
133
|
+
fp = _spans[i][:end]
|
134
|
+
fp += 1 while fp < len && text[fp].match(/\s/)
|
135
|
+
next if fp == len
|
136
|
+
|
137
|
+
# index the following spans
|
138
|
+
while j < num && _spans[j][:begin] == fp
|
139
|
+
_spans[i][:followings] << _spans[j]
|
140
|
+
_spans[j][:precedings] << _spans[i]
|
141
|
+
j += 1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
denotations += _denotations
|
147
|
+
relations += _relations
|
148
|
+
spans += _spans unless @mode == :annotations
|
149
|
+
end
|
150
|
+
|
151
|
+
ttl = @prefix_ttl_erb.result(binding) + @tao_ttl_erb.result(binding)
|
152
|
+
end
|
153
|
+
|
154
|
+
def include_parent?(spans, span)
|
155
|
+
# spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])}
|
156
|
+
spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]}
|
157
|
+
return false
|
158
|
+
end
|
159
|
+
|
160
|
+
def get_target_info (text_uri)
|
161
|
+
sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil
|
162
|
+
sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil
|
163
|
+
divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil
|
164
|
+
|
165
|
+
return sourcedb, sourceid, divid
|
166
|
+
end
|
167
|
+
|
168
|
+
def find_uri (label, namespaces)
|
169
|
+
delimiter_position = label.index(':')
|
170
|
+
if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position])
|
171
|
+
label
|
172
|
+
elsif label =~ %r[^https?://]
|
173
|
+
"<#{label}>"
|
174
|
+
else
|
175
|
+
clabel = if label.match(/^\W+$/)
|
176
|
+
'SYM'
|
177
|
+
else
|
178
|
+
label.sub(/^\W+/, '').sub(/\W+$/, '')
|
179
|
+
end
|
180
|
+
namespaces.has_key?('_base') ? "<#{clabel}>" : "prj:#{clabel}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
2
|
+
@prefix tao: <http://pubannotation.org/ontology/tao.owl#> .
|
3
|
+
<%# namespaces -%>
|
4
|
+
<% namespaces.each_key do |p| -%>
|
5
|
+
<% if p == '_base' -%>
|
6
|
+
@base <%= "<#{namespaces[p]}>" %> .
|
7
|
+
<% else -%>
|
8
|
+
@prefix <%= p %>: <%= "<#{namespaces[p]}>" %> .
|
9
|
+
<% end -%>
|
10
|
+
<% end -%>
|
@@ -0,0 +1,9 @@
|
|
1
|
+
<%# denotations -%>
|
2
|
+
<% denotations.each do |d| -%>
|
3
|
+
<%= d[:obj_uri] %> tao:denoted_by <%= d[:span_uri] %> ;
|
4
|
+
rdf:type <%= d[:cls_uri] %> .
|
5
|
+
<% end -%>
|
6
|
+
<%# relations -%>
|
7
|
+
<% relations.each do |r| -%>
|
8
|
+
<%= r[:subj_uri] %> <%= r[:pred_uri] %> <%= r[:obj_uri] %> .
|
9
|
+
<% end -%>
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<%# spans -%>
|
2
|
+
<% spans.each do |s| -%>
|
3
|
+
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
4
|
+
tao:belongs_to <%= "<#{s[:source_uri]}>" %> ;
|
5
|
+
tao:begins_at <%= s[:begin] %> ;
|
6
|
+
tao:ends_at <%= s[:end] %> ;
|
7
|
+
<% s[:precedings].each do |s| -%>
|
8
|
+
tao:follows <%= s[:span_uri] %> ;
|
9
|
+
<% end -%>
|
10
|
+
<% s[:children].each do |s| -%>
|
11
|
+
tao:contains <%= s[:span_uri] %> ;
|
12
|
+
<% end -%>
|
13
|
+
tao:has_text "<%= s[:text] %>" .
|
14
|
+
<% end -%>
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tao_rdfizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jin-Dong Kim
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-09-26 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
|
+
to text.
|
15
|
+
email: jindong.kim@gmail.com
|
16
|
+
executables:
|
17
|
+
- tao_rdfizer
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- bin/tao_rdfizer
|
22
|
+
- lib/tao_rdfizer.rb
|
23
|
+
- lib/tao_rdfizer/tao_rdfizer.rb
|
24
|
+
- view/prefixes_ttl.erb
|
25
|
+
- view/tao_annotations_ttl.erb
|
26
|
+
- view/tao_spans_ttl.erb
|
27
|
+
homepage: https://github.com/pubannotation/tao_rdfizer
|
28
|
+
licenses:
|
29
|
+
- MIT
|
30
|
+
metadata: {}
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options: []
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 2.4.8
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: A RDF statement generator for annotations in the PubAnnotation JSON format.
|
51
|
+
test_files: []
|