tcf2nif 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +37 -0
- data/.gitlab-ci.yml +25 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +165 -0
- data/README.md +112 -0
- data/Rakefile +6 -0
- data/Tcf2Nif.ipynb +197 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/exe/convpar +7 -0
- data/exe/createturtle +32 -0
- data/exe/tcf2nif +29 -0
- data/exe/txt2tcf +30 -0
- data/lib/tcf2nif.rb +55 -0
- data/lib/tcf2nif/annotation.rb +39 -0
- data/lib/tcf2nif/bounded_element.rb +43 -0
- data/lib/tcf2nif/geo_annotation.rb +26 -0
- data/lib/tcf2nif/named_entity_annotation.rb +28 -0
- data/lib/tcf2nif/tcf_document.rb +228 -0
- data/lib/tcf2nif/token.rb +47 -0
- data/lib/tcf2nif/transformer.rb +352 -0
- data/lib/tcf2nif/version.rb +3 -0
- data/tcf2nif.gemspec +40 -0
- metadata +200 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# This file is part of the tcf2nif gem.
|
3
|
+
# Copyright (c) 2015 Peter Menke, SFB 673, Universität Bielefeld
|
4
|
+
# http://www.sfb673.org
|
5
|
+
#
|
6
|
+
# tcf2nif is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU Lesser General Public License as
|
8
|
+
# published by the Free Software Foundation, either version 3 of
|
9
|
+
# the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# tcf2nif is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with tcf2nif. If not, see
|
18
|
+
# <http://www.gnu.org/licenses/>.
|
19
|
+
|
20
|
+
module Tcf2Nif
|
21
|
+
|
22
|
+
class Token < BoundedElement
|
23
|
+
|
24
|
+
attr_accessor :pos
|
25
|
+
attr_accessor :lemma
|
26
|
+
|
27
|
+
def initialize(tcf_document, xml_element)
|
28
|
+
@tcf_document = tcf_document
|
29
|
+
@xml_element = xml_element
|
30
|
+
@pos, @lemma = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
def form
|
34
|
+
@form ||= @xml_element.text
|
35
|
+
end
|
36
|
+
|
37
|
+
def pos?
|
38
|
+
not pos.nil?
|
39
|
+
end
|
40
|
+
|
41
|
+
def lemma?
|
42
|
+
not lemma.nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
# This file is part of the tcf2nif gem.
|
3
|
+
# Copyright (c) 2015 Peter Menke, SFB 673, Universität Bielefeld
|
4
|
+
# http://www.sfb673.org
|
5
|
+
#
|
6
|
+
# tcf2nif is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU Lesser General Public License as
|
8
|
+
# published by the Free Software Foundation, either version 3 of
|
9
|
+
# the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# tcf2nif is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with tcf2nif. If not, see
|
18
|
+
# <http://www.gnu.org/licenses/>.
|
19
|
+
|
20
|
+
module Tcf2Nif
|
21
|
+
|
22
|
+
class Transformer
|
23
|
+
|
24
|
+
def initialize(tcf_doc, rdf_opts)
|
25
|
+
@tcf_doc = tcf_doc
|
26
|
+
@rdf_opts = rdf_opts
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def char_uri(base, from,to)
|
31
|
+
RDF::URI("#{base}#char=#{from},#{to}")
|
32
|
+
end
|
33
|
+
|
34
|
+
def twopart_uri(base, suffix)
|
35
|
+
RDF::URI("#{base}##{suffix}")
|
36
|
+
end
|
37
|
+
|
38
|
+
def transform(mode = :plain)
|
39
|
+
return transform_plain if mode == :plain
|
40
|
+
return transform_noprov if mode == :noprov
|
41
|
+
return transform_modularized if mode == :modularized
|
42
|
+
end
|
43
|
+
|
44
|
+
def uri_base
|
45
|
+
'http://example.org/tcf2nif/example.txt'
|
46
|
+
end
|
47
|
+
|
48
|
+
def text_converter_uri
|
49
|
+
RDF::URI('http://hdl.handle.net/11858/00-1778-0000-0004-BA56-7')
|
50
|
+
end
|
51
|
+
|
52
|
+
def tokenizer_uri
|
53
|
+
RDF::URI('http://hdl.handle.net/11858/00-1778-0000-0004-BA56-7')
|
54
|
+
end
|
55
|
+
|
56
|
+
def pos_tagger_uri
|
57
|
+
RDF::URI('http://hdl.handle.net/11858/00-247C-0000-0007-3739-5')
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def tokenization_activity_uri
|
62
|
+
twopart_uri(uri_base, 'TokenizationActivity')
|
63
|
+
end
|
64
|
+
|
65
|
+
def pos_tagging_activity_uri
|
66
|
+
twopart_uri(uri_base, 'PosTaggingActivity')
|
67
|
+
end
|
68
|
+
|
69
|
+
def ne_tagging_activity_uri
|
70
|
+
twopart_uri(uri_base, 'NeTaggingActivity')
|
71
|
+
end
|
72
|
+
|
73
|
+
def geo_tagging_activity_uri
|
74
|
+
twopart_uri(uri_base, 'GeoTaggingActivity')
|
75
|
+
end
|
76
|
+
|
77
|
+
def dep_parsing_activity_uri
|
78
|
+
twopart_uri(uri_base, 'DependencyParsingActivity')
|
79
|
+
end
|
80
|
+
|
81
|
+
def tokenization_activity_time
|
82
|
+
RDF::Literal.new('2015-07-09T14:01:00', datatype: RDF::XSD.dateTime)
|
83
|
+
end
|
84
|
+
|
85
|
+
def pos_tagging_activity_time
|
86
|
+
RDF::Literal.new('2015-07-09T14:02:00', datatype: RDF::XSD.dateTime)
|
87
|
+
end
|
88
|
+
|
89
|
+
def ne_tagging_activity_time
|
90
|
+
RDF::Literal.new('2015-07-09T14:03:00', datatype: RDF::XSD.dateTime)
|
91
|
+
end
|
92
|
+
|
93
|
+
def geo_tagging_activity_time
|
94
|
+
RDF::Literal.new('2015-07-09T14:04:00', datatype: RDF::XSD.dateTime)
|
95
|
+
end
|
96
|
+
|
97
|
+
def dep_parsing_activity_time
|
98
|
+
RDF::Literal.new('2015-07-09T14:05:00', datatype: RDF::XSD.dateTime)
|
99
|
+
end
|
100
|
+
|
101
|
+
def transform_noprov(reify=false)
|
102
|
+
graph = RDF::Graph.new
|
103
|
+
|
104
|
+
# create a document URI for the document.
|
105
|
+
context_uri = char_uri(uri_base, 0, '')
|
106
|
+
|
107
|
+
# this generates a representation of the whole primary text
|
108
|
+
graph << [ context_uri, RDF.type, NIF.String ]
|
109
|
+
graph << [ context_uri, RDF.type, NIF.Context ]
|
110
|
+
graph << [ context_uri, RDF.type, NIF.RFC5147String ]
|
111
|
+
graph << [ context_uri, NIF.isString, RDF::Literal.new(@tcf_doc.text, lang: :en) ]
|
112
|
+
graph << [ context_uri, NIF.beginIndex, RDF::Literal.new(0, datatype: RDF::XSD.nonNegativeInteger) ]
|
113
|
+
graph << [ context_uri, NIF.endIndex, RDF::Literal.new(@tcf_doc.text.length, datatype: RDF::XSD.nonNegativeInteger) ]
|
114
|
+
|
115
|
+
# This generates a representation of the single tokens
|
116
|
+
@tcf_doc.tokens.each_with_index do |token,i|
|
117
|
+
token_uri = char_uri(uri_base, token.begin_index, token.end_index)
|
118
|
+
graph << [ token_uri, NIF.referenceContext, context_uri ]
|
119
|
+
graph << [ token_uri, RDF.type, NIF.String ]
|
120
|
+
graph << [ token_uri, RDF.type, NIF.Word ]
|
121
|
+
graph << [ token_uri, RDF.type, NIF.RFC5147String ]
|
122
|
+
graph << [ token_uri, NIF.beginIndex, RDF::Literal.new(token.begin_index, datatype: RDF::XSD.nonNegativeInteger) ]
|
123
|
+
graph << [ token_uri, NIF.endIndex, RDF::Literal.new(token.end_index, datatype: RDF::XSD.nonNegativeInteger) ]
|
124
|
+
graph << [ token_uri, NIF.anchorOf, RDF::Literal.new(token.form, datatype: RDF::XSD.string) ]
|
125
|
+
|
126
|
+
# adds data about POS if this data is present
|
127
|
+
if token.pos? && token.pos =~ /\w+/
|
128
|
+
# TODO Tokens must be checked whether they contain strange characters!
|
129
|
+
nif_pos(token, i, reify).each do |trip|
|
130
|
+
graph << trip
|
131
|
+
end
|
132
|
+
end
|
133
|
+
# Adds data about lemma if this data is present
|
134
|
+
if token.lemma?
|
135
|
+
nif_lemma(token, i, reify).each do |trip|
|
136
|
+
graph << trip #[ token_uri, NIF.lemma, RDF::Literal.new(token.lemma, datatype: RDF::XSD.string) ]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
i = 0
|
142
|
+
@tcf_doc.dependency_map.each do |key, value|
|
143
|
+
dep = key.first
|
144
|
+
gov = key.last
|
145
|
+
i = i + 1
|
146
|
+
if reify
|
147
|
+
tok_uri = char_uri(uri_base, dep.begin_index, dep.end_index)
|
148
|
+
anno_uri = twopart_uri(uri_base, "Dep#{i}")
|
149
|
+
graph << [tok_uri, NIF.annotation, anno_uri]
|
150
|
+
graph << [anno_uri, NIF.dependency, char_uri(uri_base, gov.begin_index, gov.end_index)]
|
151
|
+
graph << [anno_uri, NIF.dependencyRelationType, RDF::Literal.new(value)]
|
152
|
+
graph << [anno_uri, PROV.wasGeneratedBy, dep_parsing_activity_uri]
|
153
|
+
graph << [anno_uri, PROV.wasDerivedFrom, tok_uri]
|
154
|
+
graph << [anno_uri, PROV.wasDerivedFrom, char_uri(uri_base, gov.begin_index, gov.end_index)]
|
155
|
+
graph << [anno_uri, PROV.generatedAtTime, dep_parsing_activity_time]
|
156
|
+
else
|
157
|
+
# puts char_uri(uri_base, dep.begin_index, dep.end_index)
|
158
|
+
# puts NIF.dependency
|
159
|
+
# puts char_uri(uri_base, gov.begin_index, gov.end_index)
|
160
|
+
# puts char_uri(uri_base, dep.begin_index, dep.end_index)
|
161
|
+
# puts NIF.dependencyRelationType
|
162
|
+
# puts RDF::Literal.new(value)
|
163
|
+
|
164
|
+
graph << [char_uri(uri_base, dep.begin_index, dep.end_index), NIF.dependency, char_uri(uri_base, gov.begin_index, gov.end_index)]
|
165
|
+
graph << [char_uri(uri_base, dep.begin_index, dep.end_index), NIF.dependencyRelationType, RDF::Literal.new(value)]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
return graph if reify
|
170
|
+
|
171
|
+
# TODO add information about named entities
|
172
|
+
# named entities
|
173
|
+
# get all named entities from the corpus.
|
174
|
+
# are they in there, anyway?
|
175
|
+
@tcf_doc.named_entities.each_with_index do |ne,i|
|
176
|
+
# generate a string for reference if more than one token is used.
|
177
|
+
# else, use just the URI for that given token.
|
178
|
+
current_uri = char_uri(uri_base, ne.tokens.first.begin_index, ne.tokens.first.end_index)
|
179
|
+
if ne.tokens.size > 1
|
180
|
+
# create a new string thing
|
181
|
+
min_ind = ne.tokens.min{|t| t.begin_index}.begin_index
|
182
|
+
max_ind = ne.tokens.max{|t| t.end_index}.end_index
|
183
|
+
current_uri = char_uri(uri_base, min_ind, max_ind)
|
184
|
+
end
|
185
|
+
anno_uri = twopart_uri(uri_base, "ne#{i}")
|
186
|
+
graph << [current_uri, NIF::annotation, anno_uri]
|
187
|
+
graph << [anno_uri, RDF.type, NIF.String]
|
188
|
+
# puts '(%3i) %20s . %40s : %20s' % [ne.tokens.size, current_uri, ne.tokens.collect{|t| t.form}.join(' '), ne.category]
|
189
|
+
graph << [anno_uri, NIF.taNerdCoreClassRef, NERD[ne.category.capitalize] ]
|
190
|
+
end
|
191
|
+
|
192
|
+
# TODO add information about geolocations
|
193
|
+
@tcf_doc.geo_annotations.each_with_index do |geo,i|
|
194
|
+
min_ind = geo.tokens.min{|t| t.begin_index}.begin_index
|
195
|
+
max_ind = geo.tokens.max{|t| t.end_index}.end_index
|
196
|
+
current_uri = char_uri(uri_base, min_ind, max_ind)
|
197
|
+
graph << [current_uri, RDF.type, NIF.String]
|
198
|
+
anno_uri = twopart_uri(uri_base, "geo#{i}")
|
199
|
+
|
200
|
+
graph << [current_uri, NIF::annotation, anno_uri]
|
201
|
+
graph << [anno_uri, Tcf2Nif::GEO.lat, geo.lat]
|
202
|
+
graph << [anno_uri, Tcf2Nif::GEO.long, geo.lon]
|
203
|
+
graph << [anno_uri, Tcf2Nif::GEO.alt, geo.alt]
|
204
|
+
graph << [anno_uri, RDF::URI('http://example.org/tcf2nif/continent'), geo.continent]
|
205
|
+
end
|
206
|
+
|
207
|
+
# TODO add information about dependency trees
|
208
|
+
|
209
|
+
graph
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
def transform_plain
|
214
|
+
#puts "1"
|
215
|
+
graph = transform_noprov(true)
|
216
|
+
#puts "2"
|
217
|
+
text_uri = char_uri(uri_base, 0, '')
|
218
|
+
# add provenance info to some of the triples.
|
219
|
+
# 1. add static Prov data for the tool chain.
|
220
|
+
# 2. add provenance data for the TCF-formatted text.
|
221
|
+
# 3. add provenance data for each token.
|
222
|
+
#puts "3"
|
223
|
+
@tcf_doc.tokens.each do |token|
|
224
|
+
token_uri = char_uri(uri_base, token.begin_index, token.end_index)
|
225
|
+
graph << [token_uri, Tcf2Nif::PROV.wasGeneratedBy, tokenization_activity_uri]
|
226
|
+
graph << [token_uri, Tcf2Nif::PROV.wasDerivedFrom, text_uri]
|
227
|
+
graph << [token_uri, Tcf2Nif::PROV.generatedAtTime, tokenization_activity_time]
|
228
|
+
end
|
229
|
+
|
230
|
+
# add info to named entities
|
231
|
+
#puts "4"
|
232
|
+
@tcf_doc.named_entities.each_with_index do |ne,i|
|
233
|
+
#puts " a"
|
234
|
+
anno_uri = twopart_uri(uri_base, "ne#{i}")
|
235
|
+
#puts " b"
|
236
|
+
graph << [anno_uri, Tcf2Nif::PROV.wasGeneratedBy, ne_tagging_activity_uri]
|
237
|
+
#puts " c"
|
238
|
+
#puts ne.tokens.size
|
239
|
+
ne.tokens.each do |tok|
|
240
|
+
#puts tok.class.name
|
241
|
+
#puts tok.begin_index
|
242
|
+
#puts tok.end_index
|
243
|
+
|
244
|
+
graph << [anno_uri, Tcf2Nif::PROV.wasDerivedFrom, char_uri(uri_base, tok.begin_index, tok.end_index)]
|
245
|
+
graph << [char_uri(uri_base, tok.begin_index, tok.end_index), NIF.annotation, anno_uri]
|
246
|
+
#puts " d"
|
247
|
+
end
|
248
|
+
#puts " e"
|
249
|
+
graph << [anno_uri, Tcf2Nif::PROV.generatedAtTime, ne_tagging_activity_time]
|
250
|
+
end
|
251
|
+
#puts "5"
|
252
|
+
|
253
|
+
@tcf_doc.geo_annotations.each_with_index do |geo,i|
|
254
|
+
anno_uri = twopart_uri(uri_base, "geo#{i}")
|
255
|
+
graph << [anno_uri, Tcf2Nif::PROV.wasGeneratedBy, geo_tagging_activity_uri]
|
256
|
+
geo.tokens.each do |tok|
|
257
|
+
graph << [anno_uri, Tcf2Nif::PROV.wasDerivedFrom, char_uri(uri_base, tok.begin_index, tok.end_index)]
|
258
|
+
graph << [char_uri(uri_base, tok.begin_index, tok.end_index), NIF.annotation, anno_uri]
|
259
|
+
end
|
260
|
+
graph << [anno_uri, Tcf2Nif::PROV.generatedAtTime, geo_tagging_activity_time]
|
261
|
+
end
|
262
|
+
graph
|
263
|
+
end
|
264
|
+
|
265
|
+
def transform_modularized()
|
266
|
+
graph = RDF::Graph.new
|
267
|
+
|
268
|
+
# create a document URI for the document.
|
269
|
+
context_uri = char_uri(uri_base, 0, '')
|
270
|
+
|
271
|
+
# generate the modules
|
272
|
+
pri_module_uri = twopart_uri(uri_base, 'PrimaryTextModule')
|
273
|
+
tok_module_uri = twopart_uri(uri_base, 'TokenizationModule')
|
274
|
+
pos_module_uri = twopart_uri(uri_base, 'PosModule')
|
275
|
+
lem_module_uri = twopart_uri(uri_base, 'LemmaModule')
|
276
|
+
|
277
|
+
module_uris = [pri_module_uri, tok_module_uri, pos_module_uri, lem_module_uri]
|
278
|
+
|
279
|
+
module_uris.each do |u|
|
280
|
+
graph << [u, RDF.type, MOND.Module ]
|
281
|
+
graph << [u, MOND.belongsToDocument, uri_base ]
|
282
|
+
end
|
283
|
+
|
284
|
+
graph << [ tok_module_uri, MOND.propagateType, NIF.String ]
|
285
|
+
graph << [ tok_module_uri, MOND.propagateType, NIF.Word ]
|
286
|
+
graph << [ tok_module_uri, MOND.propagateType, NIF.RFC5147String ]
|
287
|
+
|
288
|
+
|
289
|
+
# this generates a representation of the whole primary text
|
290
|
+
# put this into a separate module. Assign the module to the document.
|
291
|
+
graph << [ context_uri, RDF.type, NIF.String ]
|
292
|
+
graph << [ context_uri, RDF.type, NIF.Context ]
|
293
|
+
graph << [ context_uri, RDF.type, NIF.RFC5147String ]
|
294
|
+
graph << [ context_uri, NIF.isString, RDF::Literal.new(@tcf_doc.text, lang: :en) ]
|
295
|
+
graph << [ context_uri, NIF.beginIndex, RDF::Literal.new(0, datatype: RDF::XSD.nonNegativeInteger) ]
|
296
|
+
graph << [ context_uri, NIF.endIndex, RDF::Literal.new(@tcf_doc.text.length, datatype: RDF::XSD.nonNegativeInteger) ]
|
297
|
+
graph << [ context_uri, MOND.belongsToModule, pri_module_uri ]
|
298
|
+
|
299
|
+
# This generates a representation of the single tokens
|
300
|
+
@tcf_doc.tokens.each_with_index do |token,i|
|
301
|
+
token_uri = char_uri(uri_base, token.begin_index, token.end_index)
|
302
|
+
graph << [ token_uri, NIF.referenceContext, context_uri ]
|
303
|
+
# graph << [ token_uri, RDF.type, NIF.String ]
|
304
|
+
# graph << [ token_uri, RDF.type, NIF.Word ]
|
305
|
+
# graph << [ token_uri, RDF.type, NIF.RFC5147String ]
|
306
|
+
graph << [ token_uri, NIF.beginIndex, RDF::Literal.new(token.begin_index, datatype: RDF::XSD.nonNegativeInteger) ]
|
307
|
+
graph << [ token_uri, NIF.endIndex, RDF::Literal.new(token.end_index, datatype: RDF::XSD.nonNegativeInteger) ]
|
308
|
+
graph << [ token_uri, NIF.anchorOf, RDF::Literal.new(token.form, datatype: RDF::XSD.string) ]
|
309
|
+
graph << [ token_uri, MOND.belongsToModule, tok_module_uri ]
|
310
|
+
end
|
311
|
+
|
312
|
+
graph
|
313
|
+
end
|
314
|
+
|
315
|
+
def nif_pos(token, index, reify=false, tagset=Tcf2Nif::PENN)
|
316
|
+
subject = char_uri(uri_base, token.begin_index, token.end_index)
|
317
|
+
pos = token.pos
|
318
|
+
if reify
|
319
|
+
anno_uri = twopart_uri(uri_base, "Pos#{index}")
|
320
|
+
[
|
321
|
+
[subject, NIF.annotation, anno_uri],
|
322
|
+
[anno_uri, NIF.oliaLink, tagset[pos]],
|
323
|
+
[anno_uri, PROV.wasGeneratedBy, pos_tagging_activity_uri],
|
324
|
+
[anno_uri, PROV.wasDerivedFrom, subject],
|
325
|
+
[anno_uri, PROV.generatedAtTime, pos_tagging_activity_time]
|
326
|
+
]
|
327
|
+
else
|
328
|
+
[[subject, NIF.oliaLink, tagset[pos]]]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def nif_lemma(token, index, reify=false)
|
333
|
+
subject = char_uri(uri_base, token.begin_index, token.end_index)
|
334
|
+
lemma = token.lemma
|
335
|
+
if reify
|
336
|
+
anno_uri = twopart_uri(uri_base, "Lemma#{index}")
|
337
|
+
[
|
338
|
+
[subject, NIF.annotation, anno_uri],
|
339
|
+
[anno_uri, NIF.lemma, RDF::Literal.new(lemma, datatype: RDF::XSD.string)],
|
340
|
+
[anno_uri, PROV.wasGeneratedBy, pos_tagging_activity_uri],
|
341
|
+
[anno_uri, PROV.wasDerivedFrom, subject],
|
342
|
+
[anno_uri, PROV.generatedAtTime, pos_tagging_activity_time]
|
343
|
+
]
|
344
|
+
else
|
345
|
+
[[subject, NIF.lemma, RDF::Literal.new(lemma, datatype: RDF::XSD.string)]]
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
end
|
data/tcf2nif.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'tcf2nif/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "tcf2nif"
|
8
|
+
spec.version = Tcf2Nif::VERSION
|
9
|
+
spec.authors = ["Peter Menke"]
|
10
|
+
spec.email = ["pmenke@googlemail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{A small NLP data converter from the TCF to the NIF format}
|
13
|
+
spec.description = %q{tcf2nif converts NLP data from the TCF format (used by WebLicht) to the RDF-based NIF format.}
|
14
|
+
spec.homepage = "http://github.com/pmenke/tcf2nif"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
17
|
+
# delete this section to allow pushing this gem to any host.
|
18
|
+
if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
+
else
|
21
|
+
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
22
|
+
end
|
23
|
+
|
24
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
|
29
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
30
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
31
|
+
spec.add_development_dependency "rspec"
|
32
|
+
spec.add_development_dependency "spork"
|
33
|
+
spec.add_development_dependency "simplecov"
|
34
|
+
|
35
|
+
spec.add_runtime_dependency 'rdf', '~> 1.1'
|
36
|
+
spec.add_runtime_dependency 'rdf-turtle', '~> 1.1'
|
37
|
+
spec.add_runtime_dependency 'nokogiri', '~> 1.6'
|
38
|
+
spec.add_runtime_dependency 'trollop' # , '~> 1.6'
|
39
|
+
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tcf2nif
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Peter Menke
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.9'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: spork
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rdf
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.1'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.1'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rdf-turtle
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.1'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.1'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.6'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.6'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: trollop
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
description: tcf2nif converts NLP data from the TCF format (used by WebLicht) to the
|
140
|
+
RDF-based NIF format.
|
141
|
+
email:
|
142
|
+
- pmenke@googlemail.com
|
143
|
+
executables:
|
144
|
+
- convpar
|
145
|
+
- createturtle
|
146
|
+
- tcf2nif
|
147
|
+
- txt2tcf
|
148
|
+
extensions: []
|
149
|
+
extra_rdoc_files: []
|
150
|
+
files:
|
151
|
+
- ".gitignore"
|
152
|
+
- ".gitlab-ci.yml"
|
153
|
+
- ".rspec"
|
154
|
+
- ".travis.yml"
|
155
|
+
- CODE_OF_CONDUCT.md
|
156
|
+
- Gemfile
|
157
|
+
- LICENSE.txt
|
158
|
+
- README.md
|
159
|
+
- Rakefile
|
160
|
+
- Tcf2Nif.ipynb
|
161
|
+
- bin/console
|
162
|
+
- bin/setup
|
163
|
+
- exe/convpar
|
164
|
+
- exe/createturtle
|
165
|
+
- exe/tcf2nif
|
166
|
+
- exe/txt2tcf
|
167
|
+
- lib/tcf2nif.rb
|
168
|
+
- lib/tcf2nif/annotation.rb
|
169
|
+
- lib/tcf2nif/bounded_element.rb
|
170
|
+
- lib/tcf2nif/geo_annotation.rb
|
171
|
+
- lib/tcf2nif/named_entity_annotation.rb
|
172
|
+
- lib/tcf2nif/tcf_document.rb
|
173
|
+
- lib/tcf2nif/token.rb
|
174
|
+
- lib/tcf2nif/transformer.rb
|
175
|
+
- lib/tcf2nif/version.rb
|
176
|
+
- tcf2nif.gemspec
|
177
|
+
homepage: http://github.com/pmenke/tcf2nif
|
178
|
+
licenses: []
|
179
|
+
metadata: {}
|
180
|
+
post_install_message:
|
181
|
+
rdoc_options: []
|
182
|
+
require_paths:
|
183
|
+
- lib
|
184
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
185
|
+
requirements:
|
186
|
+
- - ">="
|
187
|
+
- !ruby/object:Gem::Version
|
188
|
+
version: '0'
|
189
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - ">="
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
194
|
+
requirements: []
|
195
|
+
rubyforge_project:
|
196
|
+
rubygems_version: 2.4.6
|
197
|
+
signing_key:
|
198
|
+
specification_version: 4
|
199
|
+
summary: A small NLP data converter from the TCF to the NIF format
|
200
|
+
test_files: []
|