tao_rdfizer 0.9.3 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa880e0cce2aae4ec41d6e3c2de0a5cafa8b53a9
4
- data.tar.gz: 91b43e74c2cdd085ed5ba9c50c269987a0ca6516
3
+ metadata.gz: 4614f8d18eaae8dd3861464004f1885935a2e375
4
+ data.tar.gz: bd2ae206163a9b74bb8c3338b75d6c6c296faa9e
5
5
  SHA512:
6
- metadata.gz: c9c83a025384fcb45bc15543183a2bab00b3b2c185280c5c7d3a2e88ef0f2fef2783c21894fea5d0b7464e7dc1d2dff957324ace72d5767f82cb6db8677dbff5
7
- data.tar.gz: 95a19fc8ca13d66cc154b7968f75e9256d69970d0ead660b8271a7156e548903df5217a6d97d6ab8081151d9a83e059753ba4bad430e4458e332a89141ba0e8e
6
+ metadata.gz: 7ba0a1fb7a42873b77e04f6d79b81f3d75f926a0a91f0d737c8694a94cf09098844bf12f30e45298a9c8283942ebffc443660e6eb1d1423547eb58c1c85ec765
7
+ data.tar.gz: a54949e67f7d2fad1ce73cd87f7eacdcddfef732d66273d0933b18dd1cbaa931bf81f1c086dcaa77bc50ef34f9fc8ff10d465a50192774ecea4f06b85e9f2e25
data/bin/tao_rdfizer CHANGED
@@ -2,11 +2,37 @@
2
2
  require 'tao_rdfizer'
3
3
  require 'json'
4
4
 
5
+ mode = nil
6
+
7
+ ## command line option processing
8
+ require 'optparse'
9
+ optparse = OptionParser.new do |opts|
10
+ opts.banner = "Usage: tao_rdfizer [options] an_annotation_file.json"
11
+
12
+ opts.on('-a', '--annotations', 'rdfize only the annotations.') do
13
+ mode = :annotations
14
+ end
15
+
16
+ opts.on('-s', '--spans', 'rdfize only the spans.') do
17
+ mode = :spans
18
+ end
19
+
20
+ opts.on('-h', '--help', 'displays this screen.') do
21
+ puts opts
22
+ exit
23
+ end
24
+ end
25
+
26
+ optparse.parse!
27
+
28
+ unless ARGV.length > 0
29
+ puts optparse.help
30
+ exit
31
+ end
32
+
5
33
  begin
6
34
  annotations = JSON.parse File.read(ARGV[0]), :symbolize_names => true
7
35
  annotations = [annotations] unless annotations.class == Array
8
- mode = :annotations
9
- # mode = :spans
10
36
  rdfizer = TAO::RDFizer.new(mode)
11
37
  puts rdfizer.rdfize(annotations)
12
38
  rescue ArgumentError, IOError => e
@@ -73,14 +73,6 @@ class TAO::RDFizer
73
73
  unless @mode == :annotations
74
74
  # collect spans
75
75
  _spans = _denotations.map{|d| d[:span]}
76
-
77
- # # collect virtual spans
78
- # position = 0
79
- # annotations[:text].scan(/[^\W]*\W/).each do |tok|
80
- # _spans << {:begin => position, :end => position + tok.index(/\W/)}
81
- # position += tok.length
82
- # end
83
-
84
76
  _spans.uniq!
85
77
 
86
78
  # add_infomation
@@ -111,30 +103,21 @@ class TAO::RDFizer
111
103
 
112
104
  # initilaize the index
113
105
  (0 ... num).each do |i|
114
- _spans[i][:followings] = []
115
106
  _spans[i][:precedings] = []
116
- _spans[i][:children] = []
117
107
  end
118
108
 
119
109
  (0 ... num).each do |i|
120
- # index the embedded spans
110
+ # find the first following span
121
111
  j = i + 1
122
- while j < num && _spans[j][:begin] < _spans[i][:end]
123
- unless include_parent?(_spans[i][:children], _spans[j])
124
- _spans[i][:children] << _spans[j]
125
- _spans[j][:parent] = _spans[i]
126
- end
127
- j += 1
128
- end
112
+ j += 1 while j < num && _spans[j][:begin] < _spans[i][:end]
129
113
 
130
- # find the following position
114
+ # find adjacent positions
131
115
  fp = _spans[i][:end]
132
116
  fp += 1 while fp < len && text[fp].match(/\s/)
133
117
  next if fp == len
134
118
 
135
- # index the following spans
119
+ # index adjacent spans
136
120
  while j < num && _spans[j][:begin] == fp
137
- _spans[i][:followings] << _spans[j]
138
121
  _spans[j][:precedings] << _spans[i]
139
122
  j += 1
140
123
  end
@@ -195,16 +178,13 @@ class TAO::RDFizer
195
178
  ERB_SPANS_TTL = <<~HEREDOC
196
179
  <% spans.each do |s| -%>
197
180
  <%= s[:span_uri] %> rdf:type tao:Text_span ;
181
+ tao:has_text "<%= s[:text] %>" .
198
182
  tao:belongs_to <<%= s[:source_uri] %>> ;
199
183
  tao:begins_at <%= s[:begin] %> ;
200
184
  tao:ends_at <%= s[:end] %> ;
201
185
  <% s[:precedings].each do |s| -%>
202
186
  tao:follows <%= s[:span_uri] %> ;
203
187
  <% end -%>
204
- <% s[:children].each do |s| -%>
205
- tao:contains <%= s[:span_uri] %> ;
206
- <% end -%>
207
- tao:has_text "<%= s[:text] %>" .
208
188
  <% end -%>
209
189
  HEREDOC
210
190
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tao_rdfizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 0.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jin-Dong Kim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-27 00:00:00.000000000 Z
11
+ date: 2017-11-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It uses TAO (text annotation ontology) for representation of annotations
14
14
  to text.