opener-tree-tagger 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +150 -0
- data/bin/opener-tree-tagger-daemon +7 -0
- data/bin/opener-tree-tagger-server +11 -0
- data/bin/tree-tagger +7 -0
- data/config.ru +5 -0
- data/core/dutch.map.treetagger.kaf.csv +40 -0
- data/core/english.map.treetagger.kaf.csv +36 -0
- data/core/french.map.treetagger.kaf.csv +33 -0
- data/core/german.map.treetagger.kaf.csv +52 -0
- data/core/italian.map.treetagger.kaf.csv +38 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/spanish.map.treetagger.kaf.csv +75 -0
- data/core/token_matcher.py +82 -0
- data/core/tt_from_kaf_to_kaf.py +215 -0
- data/exec/tree-tagger.rb +9 -0
- data/ext/hack/Rakefile +13 -0
- data/ext/hack/support.rb +38 -0
- data/lib/opener/tree_tagger.rb +69 -0
- data/lib/opener/tree_tagger/cli.rb +69 -0
- data/lib/opener/tree_tagger/public/markdown.css +284 -0
- data/lib/opener/tree_tagger/server.rb +16 -0
- data/lib/opener/tree_tagger/version.rb +5 -0
- data/lib/opener/tree_tagger/views/index.erb +96 -0
- data/lib/opener/tree_tagger/views/result.erb +15 -0
- data/opener-tree-tagger.gemspec +35 -0
- data/pre_build_requirements.txt +1 -0
- metadata +197 -0
@@ -0,0 +1,75 @@
|
|
1
|
+
ACRNM O acronym (ISO, CEI)
|
2
|
+
ADJ G Adjectives (mayores, mayor)
|
3
|
+
ADV A Adverbs (muy, demasiado, c�mo)
|
4
|
+
ALFP O Plural letter of the alphabet (As/Aes, bes)
|
5
|
+
ALFS O Singular letter of the alphabet (A, b)
|
6
|
+
ART D Articles (un, las, la, unas)
|
7
|
+
BACKSLASH O backslash (\)
|
8
|
+
CARD O Cardinals
|
9
|
+
CC C Coordinating conjunction (y, o)
|
10
|
+
CCAD C Adversative coordinating conjunction (pero)
|
11
|
+
CCNEG C Negative coordinating conjunction (ni)
|
12
|
+
CM O comma (,)
|
13
|
+
CODE O Alphanumeric code
|
14
|
+
COLON O colon (:)
|
15
|
+
CQUE C que (as conjunction)
|
16
|
+
CSUBF C Subordinating conjunction that introduces finite clauses (apenas)
|
17
|
+
CSUBI C Subordinating conjunction that introduces infinite clauses (al)
|
18
|
+
CSUBX C Subordinating conjunction underspecified for subord-type (aunque)
|
19
|
+
DASH O dash (-)
|
20
|
+
DM Q Demonstrative pronouns (�sas, �se, esta)
|
21
|
+
DOTS O POS tag for "..."
|
22
|
+
FO O Formula
|
23
|
+
FS O Full stop punctuation marks
|
24
|
+
INT Q Interrogative pronouns (qui�nes, cu�ntas, cu�nto)
|
25
|
+
ITJN O Interjection (oh, ja)
|
26
|
+
LP O left parenthesis ("(", "[")
|
27
|
+
NC N Common nouns (mesas, mesa, libro, ordenador)
|
28
|
+
NEG O Negation
|
29
|
+
NMEA N measure noun (metros, litros)
|
30
|
+
NMON N month name
|
31
|
+
NP R Proper nouns
|
32
|
+
ORD O Ordinals (primer, primeras, primera)
|
33
|
+
PAL O Portmanteau word formed by a and el
|
34
|
+
PDEL O Portmanteau word formed by de and el
|
35
|
+
PE O Foreign word
|
36
|
+
PERCT O percent sign (%)
|
37
|
+
PNC O Unclassified word
|
38
|
+
PPC Q Clitic personal pronoun (le, les)
|
39
|
+
PPO Q Possessive pronouns (mi, su, sus)
|
40
|
+
PPX Q Clitics and personal pronouns (nos, me, nosotras, te, s�)
|
41
|
+
PREP O Negative preposition (sin)
|
42
|
+
PREP O Preposition
|
43
|
+
PREP/DEL O Complex preposition "despu�s del"
|
44
|
+
QT O quotation symbol (" ' `)
|
45
|
+
QU O Quantifiers (sendas, cada)
|
46
|
+
REL Q Relative pronouns (cuyas, cuyo)
|
47
|
+
RP O right parenthesis (")", "]")
|
48
|
+
SE O Se (as particle)
|
49
|
+
SEMICOLON O semicolon (;)
|
50
|
+
SLASH O slash (/)
|
51
|
+
SYM O Symbols
|
52
|
+
UMMX N measure unit (MHz, km, mA)
|
53
|
+
VCLIger V clitic gerund verb
|
54
|
+
VCLIinf V clitic infinitive verb
|
55
|
+
VCLIfin V clitic finite verb
|
56
|
+
VEadj V Verb estar. Past participle
|
57
|
+
VEfin V Verb estar. Finite
|
58
|
+
VEger V Verb estar. Gerund
|
59
|
+
VEinf V Verb estar. Infinitive
|
60
|
+
VHadj V Verb haber. Past participle
|
61
|
+
VHfin V Verb haber. Finite
|
62
|
+
VHger V Verb haber. Gerund
|
63
|
+
VHinf V Verb haber. Infinitive
|
64
|
+
VLadj V Lexical verb. Past participle
|
65
|
+
VLfin V Lexical verb. Finite
|
66
|
+
VLger V Lexical verb. Gerund
|
67
|
+
VLinf V Lexical verb. Infinitive
|
68
|
+
VMadj V Modal verb. Past participle
|
69
|
+
VMfin V Modal verb. Finite
|
70
|
+
VMger V Modal verb. Gerund
|
71
|
+
VMinf V Modal verb. Infinitive
|
72
|
+
VSadj V Verb ser. Past participle
|
73
|
+
VSfin V Verb ser. Finite
|
74
|
+
VSger V Verb ser. Gerund
|
75
|
+
VSinf V Verb ser. Infinitive
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
|
4
|
+
#####
|
5
|
+
# 4-Mar-2013 : modified order of rules to check first if there is a merge and then if it is an extra token
|
6
|
+
# becuase of this case, where can be both: [ .. . ] [ . . . ]
|
7
|
+
|
8
|
+
|
9
|
+
def add_match(d,id_new,id_ref):
|
10
|
+
if id_new in d:
|
11
|
+
d[id_new].append(id_ref)
|
12
|
+
else:
|
13
|
+
d[id_new]=[id_ref]
|
14
|
+
|
15
|
+
|
16
|
+
def token_matcher(l_ref,l_new,super_d):
|
17
|
+
debug = False
|
18
|
+
if debug:
|
19
|
+
print l_ref
|
20
|
+
print l_new
|
21
|
+
if len(l_new)==0:
|
22
|
+
return
|
23
|
+
else:
|
24
|
+
token_ref, id_ref = l_ref[0]
|
25
|
+
token_new, id_new = l_new[0]
|
26
|
+
if token_ref == token_new:
|
27
|
+
if debug: print 'Matching ',l_ref[0],l_new[0]
|
28
|
+
if debug: print 'A',l_ref[0],l_new[0]
|
29
|
+
add_match(super_d,id_new,id_ref)
|
30
|
+
token_matcher(l_ref[1:],l_new[1:],super_d)
|
31
|
+
else:
|
32
|
+
if token_ref.startswith(token_new) : ##There was an split
|
33
|
+
if debug: print 'D'
|
34
|
+
aux = (token_ref[len(token_new):],id_ref)
|
35
|
+
l_ref[0]=aux
|
36
|
+
|
37
|
+
add_match(super_d,id_new,id_ref)
|
38
|
+
token_matcher(l_ref,l_new[1:],super_d)
|
39
|
+
|
40
|
+
elif token_new.startswith(token_ref) : ##There was a merge
|
41
|
+
if debug: print 'E'
|
42
|
+
aux = (token_new[len(token_ref)+1:],id_new)
|
43
|
+
l_new[0]=aux
|
44
|
+
add_match(super_d,id_new,id_ref)
|
45
|
+
token_matcher(l_ref[1:],l_new,super_d)
|
46
|
+
|
47
|
+
|
48
|
+
elif len(l_new)>1 and l_new[1][0]==token_ref: ## There is an extra token in l_new
|
49
|
+
if debug: print 'B',l_new[1][0],token_ref
|
50
|
+
token_matcher(l_ref[0:],l_new[1:],super_d)
|
51
|
+
|
52
|
+
|
53
|
+
elif len(l_ref)>1 and l_ref[1][0] == token_new: ## There is an extra token in l_ref
|
54
|
+
if debug: print 'C',l_ref[1:],l_new[0:]
|
55
|
+
token_matcher(l_ref[1:],l_new[0:],super_d)
|
56
|
+
|
57
|
+
|
58
|
+
else: ## Imposible matching
|
59
|
+
if debug: print 'F'
|
60
|
+
if debug: print 'Impossible match of ',l_new[0],l_ref[0]
|
61
|
+
token_matcher(l_ref[1:],l_new[1:],super_d)
|
62
|
+
|
63
|
+
|
64
|
+
if __name__ == '__main__':
|
65
|
+
l1 = []
|
66
|
+
s1 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau (Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
|
67
|
+
|
68
|
+
s1 = 'Th is is a very simple example'
|
69
|
+
for n,t in enumerate(s1.split(' ')):
|
70
|
+
l1.append((t,'id'+str(n)))
|
71
|
+
|
72
|
+
l2 = []
|
73
|
+
#s2 = 'Beatrix Wilhelmina Armgard van Oranje -Nassau ( Baarn , 31 januari 1938 ) is sinds 30 april 1980 koningin van het Koninkrijk der Nederlanden'
|
74
|
+
s2 = 'This is a very sim ple example'
|
75
|
+
for n,t in enumerate(s2.split(' ')):
|
76
|
+
l2.append((t,'id'+str(n)))
|
77
|
+
|
78
|
+
super_d = {}
|
79
|
+
token_matcher(l1,l2,super_d)
|
80
|
+
print l1
|
81
|
+
print l2
|
82
|
+
print super_d
|
@@ -0,0 +1,215 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
#-*- coding: utf8 *-*
|
3
|
+
__version__ = '1.2 4-Mar-2013'
|
4
|
+
|
5
|
+
## Last changes
|
6
|
+
# 1-Mar-2013 --> now it works with UTF-8 !!!
|
7
|
+
# 4-Mar-2013 --> added code for including the element in the linguistic processores header
|
8
|
+
# 5-Mar-2013 --> language is not a parameter, is read from the input KAF
|
9
|
+
# 9-dec-2013 --> the postagger avoids 2 terms with the same tokenid span, like 's --> ' and s
|
10
|
+
# 11-mar-2014 --> fixed problem when merge with token_matcher
|
11
|
+
###################################
|
12
|
+
|
13
|
+
|
14
|
+
import sys
|
15
|
+
import os
|
16
|
+
|
17
|
+
this_folder = os.path.dirname(os.path.realpath(__file__))
|
18
|
+
|
19
|
+
# This updates the load path to ensure that the local site-packages directory
|
20
|
+
# can be used to load packages (e.g. a locally installed copy of lxml).
|
21
|
+
sys.path.append(os.path.join(this_folder, 'site-packages/pre_build'))
|
22
|
+
|
23
|
+
import operator
|
24
|
+
import time
|
25
|
+
import getopt
|
26
|
+
import string
|
27
|
+
import subprocess
|
28
|
+
import lxml
|
29
|
+
from lxml import etree
|
30
|
+
from lxml.etree import ElementTree as ET, Element as EL, PI
|
31
|
+
from VUKafParserPy.KafParserMod import KafParser
|
32
|
+
from token_matcher import token_matcher
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
if not os.environ.get('TREE_TAGGER_PATH'):
|
37
|
+
print>>sys.stderr,"TREE_TAGGER_PATH environment variable not found. Please set the full path to your tree tagger in the TREE_TAGGER_PATH environent variable."
|
38
|
+
sys.exit(-1)
|
39
|
+
|
40
|
+
complete_path_to_treetagger = os.environ.get('TREE_TAGGER_PATH')
|
41
|
+
|
42
|
+
|
43
|
+
def loadMapping(mapping_file):
|
44
|
+
map={}
|
45
|
+
filename = os.path.join(os.path.dirname(__file__),mapping_file)
|
46
|
+
fic = open(filename)
|
47
|
+
for line in fic:
|
48
|
+
fields = line.strip().split()
|
49
|
+
map[fields[0]] = fields[1]
|
50
|
+
fic.close()
|
51
|
+
return map
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
if __name__=='__main__':
|
56
|
+
this_folder = os.path.dirname(os.path.realpath(__file__))
|
57
|
+
|
58
|
+
if sys.stdin.isatty():
|
59
|
+
print>>sys.stderr,'Input stream required.'
|
60
|
+
print>>sys.stderr,'Example usage: cat myUTF8file.kaf |',sys.argv[0]
|
61
|
+
sys.exit(-1)
|
62
|
+
|
63
|
+
time_stamp = True
|
64
|
+
try:
|
65
|
+
opts, args = getopt.getopt(sys.argv[1:],"l:",["no-time"])
|
66
|
+
for opt, arg in opts:
|
67
|
+
if opt == "--no-time":
|
68
|
+
time_stamp = False
|
69
|
+
except getopt.GetoptError:
|
70
|
+
pass
|
71
|
+
|
72
|
+
|
73
|
+
input_kaf = KafParser(sys.stdin)
|
74
|
+
my_lang = input_kaf.getLanguage()
|
75
|
+
|
76
|
+
|
77
|
+
if my_lang == 'en':
|
78
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-english-utf8'
|
79
|
+
mapping_file = this_folder +'/english.map.treetagger.kaf.csv'
|
80
|
+
model = 'English models'
|
81
|
+
elif my_lang == 'nl':
|
82
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-dutch-utf8'
|
83
|
+
mapping_file = this_folder +'/dutch.map.treetagger.kaf.csv'
|
84
|
+
model = 'Dutch models'
|
85
|
+
elif my_lang == 'de':
|
86
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-german-utf8'
|
87
|
+
mapping_file = this_folder +'/german.map.treetagger.kaf.csv'
|
88
|
+
model = 'German models'
|
89
|
+
elif my_lang == 'fr':
|
90
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-french-utf8'
|
91
|
+
mapping_file = this_folder +'/french.map.treetagger.kaf.csv'
|
92
|
+
model = 'French models'
|
93
|
+
elif my_lang == 'it':
|
94
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-italian-utf8'
|
95
|
+
mapping_file = this_folder +'/italian.map.treetagger.kaf.csv'
|
96
|
+
model = 'Italian models'
|
97
|
+
elif my_lang == 'es':
|
98
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-spanish-utf8'
|
99
|
+
mapping_file = this_folder +'/spanish.map.treetagger.kaf.csv'
|
100
|
+
model = 'Spanish models'
|
101
|
+
else: ## Default is dutch
|
102
|
+
treetagger_cmd = complete_path_to_treetagger+'/cmd/tree-tagger-dutch-utf8'
|
103
|
+
mapping_file = this_folder +'/dutch.map.treetagger.kaf.csv'
|
104
|
+
model = 'Dutch models'
|
105
|
+
|
106
|
+
map_tt_to_kaf = loadMapping(mapping_file)
|
107
|
+
|
108
|
+
|
109
|
+
## Create the input text for
|
110
|
+
reference_tokens = []
|
111
|
+
sentences = []
|
112
|
+
prev_sent='-200'
|
113
|
+
aux = []
|
114
|
+
for word, sent_id, w_id in input_kaf.getTokens():
|
115
|
+
if sent_id != prev_sent:
|
116
|
+
if len(aux) != 0:
|
117
|
+
sentences.append(aux)
|
118
|
+
aux = []
|
119
|
+
aux.append((word,w_id))
|
120
|
+
|
121
|
+
prev_sent = sent_id
|
122
|
+
if len(aux)!=0:
|
123
|
+
sentences.append(aux)
|
124
|
+
|
125
|
+
|
126
|
+
for sentence in sentences:
|
127
|
+
#print>>sys.stderr,'Input sentnece:',sentence
|
128
|
+
text = ' '.join(t.encode('utf-8') for t,_ in sentence)
|
129
|
+
|
130
|
+
if not os.path.isfile(treetagger_cmd):
|
131
|
+
print>>sys.stderr, "Can't find the proper tree tagger command: " +treetagger_cmd
|
132
|
+
raise IOError(treetagger_cmd)
|
133
|
+
try:
|
134
|
+
tt_proc = subprocess.Popen(treetagger_cmd,stdin=subprocess.PIPE, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
|
135
|
+
except Exception as e:
|
136
|
+
print>>sys.stderr,str(e)
|
137
|
+
|
138
|
+
out, err = tt_proc.communicate(text)
|
139
|
+
|
140
|
+
#print>>sys.stderr,'Output treetagger',out
|
141
|
+
data = {}
|
142
|
+
new_tokens = []
|
143
|
+
for n,line in enumerate(out.splitlines()):
|
144
|
+
line = line.decode('utf-8')
|
145
|
+
my_id='t_'+str(n)
|
146
|
+
token,pos,lemma = line.strip().split('\t')
|
147
|
+
pos_kaf = map_tt_to_kaf.get(pos,'O')
|
148
|
+
|
149
|
+
if lemma=='<unknown>':
|
150
|
+
lemma=token
|
151
|
+
pos+=' unknown_lemma'
|
152
|
+
if pos_kaf in ['N','R','G','V','A','O']:
|
153
|
+
type_term = 'open'
|
154
|
+
else:
|
155
|
+
type_term = 'close'
|
156
|
+
data[my_id] = (token,pos_kaf,lemma,type_term,pos)
|
157
|
+
new_tokens.append((token,my_id))
|
158
|
+
#tt_proc.terminate()
|
159
|
+
|
160
|
+
mapping_tokens = {}
|
161
|
+
#print
|
162
|
+
#print 'SENTENCE',sentence
|
163
|
+
#print 'New=tokens',new_tokens
|
164
|
+
token_matcher(sentence,new_tokens,mapping_tokens)
|
165
|
+
#print mapping_tokens
|
166
|
+
#print
|
167
|
+
new_terms = []
|
168
|
+
terms_for_token = {}
|
169
|
+
for token_new, id_new in new_tokens:
|
170
|
+
token,pos_kaf,lemma,type_term,pos = data[id_new]
|
171
|
+
ref_tokens = mapping_tokens[id_new]
|
172
|
+
span = []
|
173
|
+
#print token_new, id_new, ref_tokens
|
174
|
+
for ref_token in ref_tokens:
|
175
|
+
span.append(ref_token)
|
176
|
+
if ref_token in terms_for_token:
|
177
|
+
terms_for_token[ref_token].append(id_new)
|
178
|
+
else:
|
179
|
+
terms_for_token[ref_token] = [id_new]
|
180
|
+
|
181
|
+
new_terms.append((id_new,type_term,pos_kaf,pos,lemma,span))
|
182
|
+
|
183
|
+
|
184
|
+
#print terms_for_token
|
185
|
+
not_use = set()
|
186
|
+
for id_new,type_term,pos_kaf,pos,lemma,span in new_terms:
|
187
|
+
#print not_use
|
188
|
+
#print id_new
|
189
|
+
if id_new not in not_use:
|
190
|
+
new_lemma = ''
|
191
|
+
for tokenid in span:
|
192
|
+
if len(terms_for_token[tokenid]) > 1:
|
193
|
+
new_lemma += (''.join(data[t][2] for t in terms_for_token[tokenid])).lower()
|
194
|
+
not_use |= set(terms_for_token[tokenid])
|
195
|
+
if new_lemma != '':
|
196
|
+
lemma = new_lemma
|
197
|
+
|
198
|
+
###############
|
199
|
+
ele_term = EL('term',attrib={'tid':id_new,
|
200
|
+
'type':type_term,
|
201
|
+
'pos':pos_kaf,
|
202
|
+
'morphofeat':pos,
|
203
|
+
'lemma':lemma})
|
204
|
+
ele_span = EL('span')
|
205
|
+
for ref_token in span:
|
206
|
+
eleTarget = EL('target',attrib={'id':ref_token})
|
207
|
+
ele_span.append(eleTarget)
|
208
|
+
ele_term.append(ele_span)
|
209
|
+
input_kaf.addElementToLayer('terms', ele_term)
|
210
|
+
##End for each sentence
|
211
|
+
|
212
|
+
input_kaf.addLinguisticProcessor('TreeTagger_from_kaf '+model,'1.0', 'term', time_stamp)
|
213
|
+
input_kaf.saveToFile(sys.stdout)
|
214
|
+
|
215
|
+
|
data/exec/tree-tagger.rb
ADDED
data/ext/hack/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require_relative 'support'
|
3
|
+
|
4
|
+
desc 'Verifies the requirements'
|
5
|
+
task :requirements do
|
6
|
+
verify_requirements
|
7
|
+
end
|
8
|
+
|
9
|
+
task :default => :requirements do
|
10
|
+
# path = File.join(PYTHON_SITE_PACKAGES, 'pre_install')
|
11
|
+
#
|
12
|
+
# pip_install(PRE_INSTALL_REQUIREMENTS, path)
|
13
|
+
end
|
data/ext/hack/support.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'opener/build-tools'
|
2
|
+
|
3
|
+
include Opener::BuildTools::Requirements
|
4
|
+
include Opener::BuildTools::Python
|
5
|
+
include Opener::BuildTools::Files
|
6
|
+
|
7
|
+
# Directory where packages will be installed to.
|
8
|
+
PYTHON_SITE_PACKAGES = File.expand_path(
|
9
|
+
'../../../core/site-packages',
|
10
|
+
__FILE__
|
11
|
+
)
|
12
|
+
|
13
|
+
# Directory containing the temporary files.
|
14
|
+
TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
|
15
|
+
|
16
|
+
# Path to the pip requirements file used to install requirements before
|
17
|
+
# packaging the Gem.
|
18
|
+
PRE_BUILD_REQUIREMENTS = File.expand_path(
|
19
|
+
'../../../pre_build_requirements.txt',
|
20
|
+
__FILE__
|
21
|
+
)
|
22
|
+
|
23
|
+
# Path to the pip requirements file used to install requirements upon Gem
|
24
|
+
# installation.
|
25
|
+
PRE_INSTALL_REQUIREMENTS = File.expand_path(
|
26
|
+
'../../../pre_install_requirements.txt',
|
27
|
+
__FILE__
|
28
|
+
)
|
29
|
+
|
30
|
+
##
|
31
|
+
# Verifies the requirements to install thi Gem.
|
32
|
+
#
|
33
|
+
def verify_requirements
|
34
|
+
require_executable('python')
|
35
|
+
require_version('python', python_version, '2.6.0')
|
36
|
+
require_executable('pip')
|
37
|
+
require_version('pip', pip_version, '1.3.1')
|
38
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
require_relative 'tree_tagger/version'
|
5
|
+
require_relative 'tree_tagger/cli'
|
6
|
+
|
7
|
+
module Opener
|
8
|
+
class TreeTagger
|
9
|
+
attr_reader :options, :args
|
10
|
+
|
11
|
+
##
|
12
|
+
# Hash containing the default options to use.
|
13
|
+
#
|
14
|
+
# @return [Hash]
|
15
|
+
#
|
16
|
+
DEFAULT_OPTIONS = {
|
17
|
+
:args => []
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
##
|
21
|
+
# @param [Hash] options
|
22
|
+
#
|
23
|
+
# @option options [Array] :args Collection of arbitrary arguments to pass
|
24
|
+
# to the underlying kernel.
|
25
|
+
#
|
26
|
+
def initialize(options = {})
|
27
|
+
@args = options.delete(:args) || []
|
28
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
29
|
+
end
|
30
|
+
|
31
|
+
def run(input)
|
32
|
+
stdout, stderr, process = capture(input)
|
33
|
+
|
34
|
+
if process.success?
|
35
|
+
STDERR.puts(stderr) unless stderr.empty?
|
36
|
+
else
|
37
|
+
abort stderr
|
38
|
+
end
|
39
|
+
|
40
|
+
return stdout, stderr, process
|
41
|
+
end
|
42
|
+
|
43
|
+
def capture(input)
|
44
|
+
Open3.capture3(*command.split(" "), :stdin_data=>input)
|
45
|
+
end
|
46
|
+
|
47
|
+
def command
|
48
|
+
return "#{adjust_python_path} python -E -OO #{kernel} #{args.join(' ')}"
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
##
|
54
|
+
# @return [String]
|
55
|
+
#
|
56
|
+
def adjust_python_path
|
57
|
+
site_packages = File.join(core_dir, 'site-packages')
|
58
|
+
"env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
59
|
+
end
|
60
|
+
|
61
|
+
def core_dir
|
62
|
+
File.expand_path("../../core", File.dirname(__FILE__))
|
63
|
+
end
|
64
|
+
|
65
|
+
def kernel
|
66
|
+
File.join(core_dir,'/tt_from_kaf_to_kaf.py')
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|