tao_rdfizer 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/tao_rdfizer +28 -2
- data/lib/tao_rdfizer/tao_rdfizer.rb +5 -25
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4614f8d18eaae8dd3861464004f1885935a2e375
|
4
|
+
data.tar.gz: bd2ae206163a9b74bb8c3338b75d6c6c296faa9e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ba0a1fb7a42873b77e04f6d79b81f3d75f926a0a91f0d737c8694a94cf09098844bf12f30e45298a9c8283942ebffc443660e6eb1d1423547eb58c1c85ec765
|
7
|
+
data.tar.gz: a54949e67f7d2fad1ce73cd87f7eacdcddfef732d66273d0933b18dd1cbaa931bf81f1c086dcaa77bc50ef34f9fc8ff10d465a50192774ecea4f06b85e9f2e25
|
data/bin/tao_rdfizer
CHANGED
@@ -2,11 +2,37 @@
|
|
2
2
|
require 'tao_rdfizer'
|
3
3
|
require 'json'
|
4
4
|
|
5
|
+
mode = nil
|
6
|
+
|
7
|
+
## command line option processing
|
8
|
+
require 'optparse'
|
9
|
+
optparse = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: tao_rdfizer [options] an_annotation_file.json"
|
11
|
+
|
12
|
+
opts.on('-a', '--annotations', 'rdfize only the annotations.') do
|
13
|
+
mode = :annotations
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on('-s', '--spans', 'rdfize only the spans.') do
|
17
|
+
mode = :spans
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.on('-h', '--help', 'displays this screen.') do
|
21
|
+
puts opts
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
optparse.parse!
|
27
|
+
|
28
|
+
unless ARGV.length > 0
|
29
|
+
puts optparse.help
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
|
5
33
|
begin
|
6
34
|
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
7
35
|
annotations = [annotations] unless annotations.class == Array
|
8
|
-
mode = :annotations
|
9
|
-
# mode = :spans
|
10
36
|
rdfizer = TAO::RDFizer.new(mode)
|
11
37
|
puts rdfizer.rdfize(annotations)
|
12
38
|
rescue ArgumentError, IOError => e
|
@@ -73,14 +73,6 @@ class TAO::RDFizer
|
|
73
73
|
unless @mode == :annotations
|
74
74
|
# collect spans
|
75
75
|
_spans = _denotations.map{|d| d[:span]}
|
76
|
-
|
77
|
-
# # collect virtual spans
|
78
|
-
# position = 0
|
79
|
-
# annotations[:text].scan(/[^\W]*\W/).each do |tok|
|
80
|
-
# _spans << {:begin => position, :end => position + tok.index(/\W/)}
|
81
|
-
# position += tok.length
|
82
|
-
# end
|
83
|
-
|
84
76
|
_spans.uniq!
|
85
77
|
|
86
78
|
# add_infomation
|
@@ -111,30 +103,21 @@ class TAO::RDFizer
|
|
111
103
|
|
112
104
|
# initilaize the index
|
113
105
|
(0 ... num).each do |i|
|
114
|
-
_spans[i][:followings] = []
|
115
106
|
_spans[i][:precedings] = []
|
116
|
-
_spans[i][:children] = []
|
117
107
|
end
|
118
108
|
|
119
109
|
(0 ... num).each do |i|
|
120
|
-
#
|
110
|
+
# find the first following span
|
121
111
|
j = i + 1
|
122
|
-
while j < num && _spans[j][:begin] < _spans[i][:end]
|
123
|
-
unless include_parent?(_spans[i][:children], _spans[j])
|
124
|
-
_spans[i][:children] << _spans[j]
|
125
|
-
_spans[j][:parent] = _spans[i]
|
126
|
-
end
|
127
|
-
j += 1
|
128
|
-
end
|
112
|
+
j += 1 while j < num && _spans[j][:begin] < _spans[i][:end]
|
129
113
|
|
130
|
-
# find
|
114
|
+
# find adjacent positions
|
131
115
|
fp = _spans[i][:end]
|
132
116
|
fp += 1 while fp < len && text[fp].match(/\s/)
|
133
117
|
next if fp == len
|
134
118
|
|
135
|
-
# index
|
119
|
+
# index adjacent spans
|
136
120
|
while j < num && _spans[j][:begin] == fp
|
137
|
-
_spans[i][:followings] << _spans[j]
|
138
121
|
_spans[j][:precedings] << _spans[i]
|
139
122
|
j += 1
|
140
123
|
end
|
@@ -195,16 +178,13 @@ class TAO::RDFizer
|
|
195
178
|
ERB_SPANS_TTL = <<~HEREDOC
|
196
179
|
<% spans.each do |s| -%>
|
197
180
|
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
181
|
+
tao:has_text "<%= s[:text] %>" .
|
198
182
|
tao:belongs_to <<%= s[:source_uri] %>> ;
|
199
183
|
tao:begins_at <%= s[:begin] %> ;
|
200
184
|
tao:ends_at <%= s[:end] %> ;
|
201
185
|
<% s[:precedings].each do |s| -%>
|
202
186
|
tao:follows <%= s[:span_uri] %> ;
|
203
187
|
<% end -%>
|
204
|
-
<% s[:children].each do |s| -%>
|
205
|
-
tao:contains <%= s[:span_uri] %> ;
|
206
|
-
<% end -%>
|
207
|
-
tao:has_text "<%= s[:text] %>" .
|
208
188
|
<% end -%>
|
209
189
|
HEREDOC
|
210
190
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|