tao_rdfizer 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tao_rdfizer +28 -2
- data/lib/tao_rdfizer/tao_rdfizer.rb +5 -25
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4614f8d18eaae8dd3861464004f1885935a2e375
|
4
|
+
data.tar.gz: bd2ae206163a9b74bb8c3338b75d6c6c296faa9e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ba0a1fb7a42873b77e04f6d79b81f3d75f926a0a91f0d737c8694a94cf09098844bf12f30e45298a9c8283942ebffc443660e6eb1d1423547eb58c1c85ec765
|
7
|
+
data.tar.gz: a54949e67f7d2fad1ce73cd87f7eacdcddfef732d66273d0933b18dd1cbaa931bf81f1c086dcaa77bc50ef34f9fc8ff10d465a50192774ecea4f06b85e9f2e25
|
data/bin/tao_rdfizer
CHANGED
@@ -2,11 +2,37 @@
|
|
2
2
|
require 'tao_rdfizer'
|
3
3
|
require 'json'
|
4
4
|
|
5
|
+
mode = nil
|
6
|
+
|
7
|
+
## command line option processing
|
8
|
+
require 'optparse'
|
9
|
+
optparse = OptionParser.new do |opts|
|
10
|
+
opts.banner = "Usage: tao_rdfizer [options] an_annotation_file.json"
|
11
|
+
|
12
|
+
opts.on('-a', '--annotations', 'rdfize only the annotations.') do
|
13
|
+
mode = :annotations
|
14
|
+
end
|
15
|
+
|
16
|
+
opts.on('-s', '--spans', 'rdfize only the spans.') do
|
17
|
+
mode = :spans
|
18
|
+
end
|
19
|
+
|
20
|
+
opts.on('-h', '--help', 'displays this screen.') do
|
21
|
+
puts opts
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
optparse.parse!
|
27
|
+
|
28
|
+
unless ARGV.length > 0
|
29
|
+
puts optparse.help
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
|
5
33
|
begin
|
6
34
|
annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
|
7
35
|
annotations = [annotations] unless annotations.class == Array
|
8
|
-
mode = :annotations
|
9
|
-
# mode = :spans
|
10
36
|
rdfizer = TAO::RDFizer.new(mode)
|
11
37
|
puts rdfizer.rdfize(annotations)
|
12
38
|
rescue ArgumentError, IOError => e
|
@@ -73,14 +73,6 @@ class TAO::RDFizer
|
|
73
73
|
unless @mode == :annotations
|
74
74
|
# collect spans
|
75
75
|
_spans = _denotations.map{|d| d[:span]}
|
76
|
-
|
77
|
-
# # collect virtual spans
|
78
|
-
# position = 0
|
79
|
-
# annotations[:text].scan(/[^\W]*\W/).each do |tok|
|
80
|
-
# _spans << {:begin => position, :end => position + tok.index(/\W/)}
|
81
|
-
# position += tok.length
|
82
|
-
# end
|
83
|
-
|
84
76
|
_spans.uniq!
|
85
77
|
|
86
78
|
# add_infomation
|
@@ -111,30 +103,21 @@ class TAO::RDFizer
|
|
111
103
|
|
112
104
|
# initilaize the index
|
113
105
|
(0 ... num).each do |i|
|
114
|
-
_spans[i][:followings] = []
|
115
106
|
_spans[i][:precedings] = []
|
116
|
-
_spans[i][:children] = []
|
117
107
|
end
|
118
108
|
|
119
109
|
(0 ... num).each do |i|
|
120
|
-
#
|
110
|
+
# find the first following span
|
121
111
|
j = i + 1
|
122
|
-
while j < num && _spans[j][:begin] < _spans[i][:end]
|
123
|
-
unless include_parent?(_spans[i][:children], _spans[j])
|
124
|
-
_spans[i][:children] << _spans[j]
|
125
|
-
_spans[j][:parent] = _spans[i]
|
126
|
-
end
|
127
|
-
j += 1
|
128
|
-
end
|
112
|
+
j += 1 while j < num && _spans[j][:begin] < _spans[i][:end]
|
129
113
|
|
130
|
-
# find
|
114
|
+
# find adjacent positions
|
131
115
|
fp = _spans[i][:end]
|
132
116
|
fp += 1 while fp < len && text[fp].match(/\s/)
|
133
117
|
next if fp == len
|
134
118
|
|
135
|
-
# index
|
119
|
+
# index adjacent spans
|
136
120
|
while j < num && _spans[j][:begin] == fp
|
137
|
-
_spans[i][:followings] << _spans[j]
|
138
121
|
_spans[j][:precedings] << _spans[i]
|
139
122
|
j += 1
|
140
123
|
end
|
@@ -195,16 +178,13 @@ class TAO::RDFizer
|
|
195
178
|
ERB_SPANS_TTL = <<~HEREDOC
|
196
179
|
<% spans.each do |s| -%>
|
197
180
|
<%= s[:span_uri] %> rdf:type tao:Text_span ;
|
181
|
+
tao:has_text "<%= s[:text] %>" .
|
198
182
|
tao:belongs_to <<%= s[:source_uri] %>> ;
|
199
183
|
tao:begins_at <%= s[:begin] %> ;
|
200
184
|
tao:ends_at <%= s[:end] %> ;
|
201
185
|
<% s[:precedings].each do |s| -%>
|
202
186
|
tao:follows <%= s[:span_uri] %> ;
|
203
187
|
<% end -%>
|
204
|
-
<% s[:children].each do |s| -%>
|
205
|
-
tao:contains <%= s[:span_uri] %> ;
|
206
|
-
<% end -%>
|
207
|
-
tao:has_text "<%= s[:text] %>" .
|
208
188
|
<% end -%>
|
209
189
|
HEREDOC
|
210
190
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tao_rdfizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jin-Dong Kim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It uses TAO (text annotation ontology) for representation of annotations
|
14
14
|
to text.
|