shalmaneser-lib 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
data/lib/ml/maxent.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
# wrapper script for the OpenNLP Maxent classifier
|
2
|
+
|
3
|
+
# sp July 2007
|
4
|
+
|
5
|
+
|
6
|
+
require "tempfile"
|
7
|
+
require 'fileutils'
|
8
|
+
|
9
|
+
class Maxent
|
10
|
+
###
|
11
|
+
def initialize(program_path, parameters)
|
12
|
+
|
13
|
+
# @note AB: <parameters> is an Array with the last part of the
|
14
|
+
# line from the experiment file, it should contain the path to our
|
15
|
+
# java wrappers, but we don't want it.
|
16
|
+
# Since the presence of this part is checked only here we
|
17
|
+
# suppose it obsolete and set this path manually here.
|
18
|
+
# if parameters.empty?
|
19
|
+
# puts "Error: The OpenNLP maxent system needs two paths (first the location of maxent itself and then the location of the interface, usually program/tools/maxent)."
|
20
|
+
# puts "I got only the program path."
|
21
|
+
# Kernel.exit
|
22
|
+
# end
|
23
|
+
# @interface_path = parameters.first
|
24
|
+
|
25
|
+
# @note AB: Setting path manually.
|
26
|
+
# It assumes <Maxent.rb> ist in <lib/common> and
|
27
|
+
# <Classify.class> is in <lib/ext/maxent>.
|
28
|
+
# @todo AB: This assumption should be changed. ENV[]???
|
29
|
+
@interface_path = File.expand_path('../ext/maxent', File.dirname(__FILE__))
|
30
|
+
|
31
|
+
@maxentpath = program_path
|
32
|
+
|
33
|
+
unless @maxentpath =~ /\/$/
|
34
|
+
@maxentpath = @maxentpath + "/"
|
35
|
+
end
|
36
|
+
|
37
|
+
# classpath for maxent
|
38
|
+
|
39
|
+
@cp = "#{@maxentpath}:#{@maxentpath}lib:#{@maxentpath}lib/trove.jar:#{@maxentpath}output/maxent-2.4.0.jar:#{ENV["CLASSPATH"]}"
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
###
|
44
|
+
#
|
45
|
+
# write classifier to training directory...
|
46
|
+
def train(infilename,classifier_file)
|
47
|
+
trainfile = Tempfile.new(File.basename(infilename)+".maxenttrain")
|
48
|
+
infile = File.new(infilename)
|
49
|
+
c45_to_maxent(infile,trainfile) # training data in csv format
|
50
|
+
infile.close
|
51
|
+
trainfile.close
|
52
|
+
|
53
|
+
if classifier_file
|
54
|
+
@classifier_location = classifier_file
|
55
|
+
else
|
56
|
+
@classifier_location = trainfile.path+"Model.bin.gz"
|
57
|
+
end
|
58
|
+
|
59
|
+
@classifier_location = enforce_compact_storage(@classifier_location)
|
60
|
+
|
61
|
+
# store model in binary, gzipped form...
|
62
|
+
command = ["cd #{@interface_path}; ",
|
63
|
+
#"/usr/lib/jvm/java-1.7.0/bin/java -cp #{@cp} -Xmx1000m Train",
|
64
|
+
"java -cp #{@cp} -Xmx1000m Train",
|
65
|
+
trainfile.path,
|
66
|
+
@classifier_location].join(" ")
|
67
|
+
# remember location
|
68
|
+
unless successfully_run(command)
|
69
|
+
return false
|
70
|
+
end
|
71
|
+
trainfile.close(true)
|
72
|
+
end
|
73
|
+
|
74
|
+
def write(classifier_file)
|
75
|
+
|
76
|
+
classifier_file = enforce_compact_storage(classifier_file)
|
77
|
+
|
78
|
+
if @classifier_location
|
79
|
+
@classifier_location = enforce_compact_storage(@classifier_location)
|
80
|
+
%x{cp #{@classifier_location} #{classifier_file}} # store classifier
|
81
|
+
# File.chmod(0664,classifier_file+".classifier")
|
82
|
+
else
|
83
|
+
$stderr.puts "Maxent error: cannot read Maxent classifier file #{@classifier_file}."
|
84
|
+
return nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
###
|
89
|
+
def exists?(classifier_file)
|
90
|
+
classifier_file = enforce_compact_storage(classifier_file)
|
91
|
+
return FileTest.exists?(classifier_file)
|
92
|
+
end
|
93
|
+
|
94
|
+
###
|
95
|
+
# return true iff reading the classifier has had success
|
96
|
+
def read(classifier_file)
|
97
|
+
|
98
|
+
classifier_file = enforce_compact_storage(classifier_file)
|
99
|
+
|
100
|
+
if exists?(classifier_file)
|
101
|
+
@classifier_location = classifier_file
|
102
|
+
return true
|
103
|
+
else
|
104
|
+
$stderr.puts "No classifier file "+classifier_file
|
105
|
+
return false
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
###
|
110
|
+
def apply(infilename,outfilename)
|
111
|
+
|
112
|
+
@classifier_location = enforce_compact_storage(@classifier_location)
|
113
|
+
unless @classifier_location
|
114
|
+
return false
|
115
|
+
end
|
116
|
+
|
117
|
+
testfile = Tempfile.new(File.basename(infilename)+".maxenttrain")
|
118
|
+
|
119
|
+
infile = File.new(infilename)
|
120
|
+
c45_to_maxent(infile,testfile) # training data in csv format
|
121
|
+
infile.close
|
122
|
+
testfile.close
|
123
|
+
|
124
|
+
command = ["cd #{@interface_path}; ",
|
125
|
+
#"/usr/lib/jvm/java-1.7.0/bin/java -cp #{@cp} -Xmx1000m Classify ",
|
126
|
+
"java -cp #{@cp} -Xmx1000m Classify ",
|
127
|
+
testfile.path,
|
128
|
+
@classifier_location,
|
129
|
+
">",
|
130
|
+
outfilename].join(" ")
|
131
|
+
|
132
|
+
# classify
|
133
|
+
unless successfully_run(command)
|
134
|
+
return false
|
135
|
+
end
|
136
|
+
|
137
|
+
# some error in classification
|
138
|
+
unless FileTest.exists?(outfilename)
|
139
|
+
return false
|
140
|
+
end
|
141
|
+
|
142
|
+
# no errors = success
|
143
|
+
testfile.close(true)
|
144
|
+
return true
|
145
|
+
end
|
146
|
+
|
147
|
+
#####
|
148
|
+
# format of Maxent result file:
|
149
|
+
# <best label>[<confidence>] <secondbest_label>[<confidence>] ....
|
150
|
+
#
|
151
|
+
# returns a list of instance_results
|
152
|
+
# where an instance_result is a list of pairs [label, confidence]
|
153
|
+
# where the pairs are sorted by confidence
|
154
|
+
def read_resultfile(filename)
|
155
|
+
begin
|
156
|
+
f = File.new(filename)
|
157
|
+
rescue
|
158
|
+
$stderr.puts "Maxent error: cannot read Maxent result file #{filemame}."
|
159
|
+
return nil
|
160
|
+
end
|
161
|
+
|
162
|
+
retv = []
|
163
|
+
|
164
|
+
f.each do |line|
|
165
|
+
line_results = []
|
166
|
+
pieces = line.split # split at whitespace
|
167
|
+
|
168
|
+
pieces.each {|piece|
|
169
|
+
piece =~ /(\S+)\[(.+)\]/
|
170
|
+
label = $1
|
171
|
+
confidence = $2.to_f
|
172
|
+
|
173
|
+
line_results << [label, confidence]
|
174
|
+
}
|
175
|
+
|
176
|
+
# sort: most confident label first
|
177
|
+
retv << line_results.sort {|a,b| b[1] <=> a[1]}
|
178
|
+
end
|
179
|
+
|
180
|
+
f.close
|
181
|
+
|
182
|
+
retv
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
###################################
|
187
|
+
private
|
188
|
+
|
189
|
+
###
|
190
|
+
# produce input file for maxent learner: make attribute-value pairs
|
191
|
+
# where attribute == featureX=
|
192
|
+
def c45_to_maxent(inpipe,outpipe)
|
193
|
+
while (line = inpipe.gets)
|
194
|
+
line.chomp!
|
195
|
+
la = line.split(",")
|
196
|
+
label = la.pop
|
197
|
+
if label[-1,1] == "."
|
198
|
+
label.chop!
|
199
|
+
end
|
200
|
+
la.each_index {|i|
|
201
|
+
la[i] = i.to_s + "=" + la[i]
|
202
|
+
}
|
203
|
+
la.push(label)
|
204
|
+
outpipe.puts la.join(" ")
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# since the OpenNLP MaxEnt system determines storage based on filename,
|
209
|
+
# make sure that all models are stored internally as binary, gzipped files.
|
210
|
+
|
211
|
+
def enforce_compact_storage(filename)
|
212
|
+
if filename =~ /Model.bin.gz/
|
213
|
+
return filename
|
214
|
+
else
|
215
|
+
return filename+"Model.bin.gz"
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
###
|
220
|
+
def successfully_run(command)
|
221
|
+
retv = Kernel.system(command)
|
222
|
+
unless retv
|
223
|
+
$stderr.puts "Error running classifier. Continuing."
|
224
|
+
$stderr.puts "Offending command: "+command
|
225
|
+
# exit 1
|
226
|
+
end
|
227
|
+
return retv
|
228
|
+
end
|
229
|
+
end
|
data/lib/ml/optimize.rb
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
# sp 29 07 04
|
2
|
+
# "optimise" c4.5 files by replacing all feature values which only
|
3
|
+
# occur with one label by a new, common value.
|
4
|
+
#
|
5
|
+
# two modes of operation:
|
6
|
+
# optimise <file> -- optimise file and store optimisations in <file>.opts
|
7
|
+
# optimise <file> <file.opts> -- apply optimisation from file.opts to file
|
8
|
+
|
9
|
+
class Optimise
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@ready = false
|
13
|
+
end
|
14
|
+
|
15
|
+
def init_from_data(infile) # find new optimisation
|
16
|
+
|
17
|
+
STDERR.puts "[Optimise] computing new feature optimisation"
|
18
|
+
|
19
|
+
infile = File.new(infile)
|
20
|
+
labels = []
|
21
|
+
features = nil
|
22
|
+
@replacements = [] # for each feature, store the list of replacements
|
23
|
+
|
24
|
+
# read data from infile into hash and initialise replacements array
|
25
|
+
while (line = infile.gets)
|
26
|
+
f_l = line.chomp.split(",")
|
27
|
+
|
28
|
+
if features.nil? # first line: initialisation
|
29
|
+
features = [] # for each feature: array of feature values from file
|
30
|
+
f_l.each_index {|i|
|
31
|
+
features[i] = []
|
32
|
+
@replacements[i] = {}
|
33
|
+
}
|
34
|
+
end
|
35
|
+
labels << f_l.pop
|
36
|
+
f_l.each_index {|i|
|
37
|
+
features[i] << f_l[i]
|
38
|
+
}
|
39
|
+
end
|
40
|
+
infile.close
|
41
|
+
|
42
|
+
features.each_index {|findex| # traverse all features
|
43
|
+
|
44
|
+
# for each feature *value*, find all label indices
|
45
|
+
|
46
|
+
fvalues = features[findex]
|
47
|
+
|
48
|
+
fval_to_label = {} # record fval -> label mappings
|
49
|
+
# no label : nil
|
50
|
+
# one label: <label>
|
51
|
+
# two labels: false
|
52
|
+
|
53
|
+
fvalues.each_index {|inst_idx|
|
54
|
+
label = labels[inst_idx] # current label
|
55
|
+
fval = fvalues[inst_idx] # current feature value
|
56
|
+
seen_label = fval_to_label[fval] # previously seen label
|
57
|
+
if seen_label.nil?
|
58
|
+
fval_to_label[fval] = label
|
59
|
+
elsif seen_label and seen_label != label
|
60
|
+
fval_to_label[fval] = false
|
61
|
+
end
|
62
|
+
} # at the end, all fvals should be mapped to either <label> or false
|
63
|
+
|
64
|
+
# construct new feature value names
|
65
|
+
|
66
|
+
new_fvals = {}
|
67
|
+
labels.each {|label|
|
68
|
+
new_fvals[label] = "f"+findex.to_s+"_"+label.gsub(/\./,"")
|
69
|
+
}
|
70
|
+
|
71
|
+
# record all features values for which we have only seen one label in @replacements
|
72
|
+
|
73
|
+
fval_to_label.each_pair {|fval,label|
|
74
|
+
if fval == "[U]"
|
75
|
+
puts "[U]: "+label.to_s+" "+new_fvals[label]
|
76
|
+
end
|
77
|
+
if label
|
78
|
+
# STDERR.puts "replacement of "+fval+" by "+new_fvals[label]
|
79
|
+
@replacements[findex][fval] = new_fvals[label]
|
80
|
+
end
|
81
|
+
}
|
82
|
+
|
83
|
+
# fvalues = features[findex]
|
84
|
+
|
85
|
+
# l_to_v = {} # label -> array of feature values
|
86
|
+
# v_to_l = {} # feature value -> array of labels
|
87
|
+
|
88
|
+
# fvalues.each_index {|inst| # traverse all instances
|
89
|
+
# fval = fvalues[inst]
|
90
|
+
# label = labels[inst]
|
91
|
+
|
92
|
+
|
93
|
+
# unless v_to_l.key?(fval) # add entry to v_to_l
|
94
|
+
# v_to_l[fval] = []
|
95
|
+
# end
|
96
|
+
# v_to_l[fval] << label
|
97
|
+
|
98
|
+
# unless l_to_v.key?(label) # add entry to l_to_v
|
99
|
+
# l_to_v[label] = []
|
100
|
+
# end
|
101
|
+
# l_to_v[label] << fval
|
102
|
+
# }
|
103
|
+
|
104
|
+
# l_to_v.each_pair {|label,values|
|
105
|
+
# newvalue = "f"+findex.to_s+"_"+label.gsub(/\./,"")
|
106
|
+
# values.each {|value|
|
107
|
+
# if v_to_l[value].uniq.length == 1
|
108
|
+
# @replacements[findex][value] = newvalue
|
109
|
+
# end
|
110
|
+
# }
|
111
|
+
# }
|
112
|
+
}
|
113
|
+
@ready = true
|
114
|
+
end
|
115
|
+
|
116
|
+
def init_from_file(optsfile) # use old optimisation
|
117
|
+
optsinfile = File.new(optsfile)
|
118
|
+
@replacements = read(optsinfile)
|
119
|
+
optsinfile.close
|
120
|
+
@ready = true
|
121
|
+
end
|
122
|
+
|
123
|
+
def store(outfilename) # store data necessary to recreate optimisation
|
124
|
+
unless @ready
|
125
|
+
raise "[Optimise] Error: Cannot store un-initialised optimisation"
|
126
|
+
end
|
127
|
+
outfile = File.new(outfilename,"w")
|
128
|
+
@replacements.each_index {|i| # for each feature
|
129
|
+
reps = @replacements[i]
|
130
|
+
outfile.puts "<"+i.to_s+">"
|
131
|
+
reps.each_pair{|old,new|
|
132
|
+
outfile.puts [old,new].join("\t")
|
133
|
+
}
|
134
|
+
outfile.puts "</"+i.to_s+">"
|
135
|
+
}
|
136
|
+
outfile.close
|
137
|
+
end
|
138
|
+
|
139
|
+
def apply(infilename,outfilename)
|
140
|
+
unless @ready
|
141
|
+
raise "[Optimise] Error: Cannot apply un-initialised optimisation"
|
142
|
+
end
|
143
|
+
|
144
|
+
STDERR.puts "[Optimise] applying feature optimisation"
|
145
|
+
|
146
|
+
infile = File.new(infilename)
|
147
|
+
outfile = File.new(outfilename,"w")
|
148
|
+
features = []
|
149
|
+
labels = []
|
150
|
+
|
151
|
+
|
152
|
+
while (line = infile.gets)
|
153
|
+
tokens = line.chomp.split(",")
|
154
|
+
|
155
|
+
unless tokens.length == @replacements.length
|
156
|
+
raise "[Optimise] Error: trying to optimise incompatible feature file!\nFile has "+features.length.to_s+" features, and we know replacements for "+@replacements.length.to_s+" features."
|
157
|
+
end
|
158
|
+
|
159
|
+
label = tokens.pop
|
160
|
+
tokens.each_index {|f_idx|
|
161
|
+
fval = tokens[f_idx]
|
162
|
+
if @replacements[f_idx].key?(fval)
|
163
|
+
tokens[f_idx] = @replacements[f_idx][fval]
|
164
|
+
end
|
165
|
+
}
|
166
|
+
tokens.push label
|
167
|
+
outfile.puts tokens.join(",")
|
168
|
+
end
|
169
|
+
outfile.close
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def read(infile)
|
175
|
+
@replacements = []
|
176
|
+
while line = infile.gets
|
177
|
+
line.chomp!
|
178
|
+
if line =~ /<(\d+)>/
|
179
|
+
reps = {}
|
180
|
+
elsif line =~ /<\/(\d+)>/
|
181
|
+
@replacements[$1.to_i] = reps
|
182
|
+
else
|
183
|
+
tokens = line.chomp.split("\t")
|
184
|
+
reps[tokens[0]] = tokens[1]
|
185
|
+
end
|
186
|
+
end
|
187
|
+
infile.close
|
188
|
+
end
|
189
|
+
|
190
|
+
# return recommended filename to store optimisation patterns for basefile
|
191
|
+
def Optimise.recommended_filename(basefile)
|
192
|
+
return basefile+".optimisations"
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
data/lib/ml/timbl.rb
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
# wrapper script for timbl learner
|
2
|
+
# sp 24 08 04
|
3
|
+
|
4
|
+
# contract for Learner classes:
|
5
|
+
|
6
|
+
class Timbl
|
7
|
+
def initialize(program_path, parameters)
|
8
|
+
|
9
|
+
@timblpath = File.join(program_path, "Timbl")
|
10
|
+
unless @timblpath =~ /\s$/
|
11
|
+
# path must end in space so we can just attach parameters
|
12
|
+
@timblpath << " "
|
13
|
+
end
|
14
|
+
|
15
|
+
if parameters.empty?
|
16
|
+
# was: +vs
|
17
|
+
@params = "-mM -k5 +vs" # default parameters
|
18
|
+
else
|
19
|
+
@params = parameters.join(" ") + " "
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def timbl_out_to_malouf_out(infilename,outfilename) # timbl: [all features], [gold standard label]
|
24
|
+
infile = File.new(infilename)
|
25
|
+
outfile = File.new(outfilename,"w")
|
26
|
+
while (line = infile.gets)
|
27
|
+
larray = line.chomp.split(",")
|
28
|
+
ml_label = larray.last
|
29
|
+
outfile.puts ml_label+"\t1"
|
30
|
+
end
|
31
|
+
infile.close
|
32
|
+
outfile.close
|
33
|
+
end
|
34
|
+
|
35
|
+
def train(infile,classifier_location) # lazy learning: for training, store the
|
36
|
+
# instancebase as a tree (TiMBL -I / -i option)
|
37
|
+
# figure out how many features we have
|
38
|
+
f = File.new(infile)
|
39
|
+
line = f.gets.chomp
|
40
|
+
num_features = line.split(",").length - 1
|
41
|
+
|
42
|
+
# and train
|
43
|
+
if classifier_location then
|
44
|
+
@instancebase = classifier_location
|
45
|
+
else
|
46
|
+
@instancebase = infile+".instancebase"
|
47
|
+
end
|
48
|
+
successfully_run(@timblpath+@params+" -N#{num_features} -f "+infile+" -I "+@instancebase)
|
49
|
+
end
|
50
|
+
|
51
|
+
# return true iff reading the classifier has had success
|
52
|
+
def read(classifierfile)
|
53
|
+
unless FileTest.exists?(classifierfile)
|
54
|
+
STDERR.puts "[Timbl] Cannot find instancebase at #{classifierfile}"
|
55
|
+
return false
|
56
|
+
end
|
57
|
+
@instancebase = classifierfile
|
58
|
+
return true
|
59
|
+
end
|
60
|
+
|
61
|
+
def exists?(classifierfile)
|
62
|
+
return FileTest.exists?(classifierfile)
|
63
|
+
end
|
64
|
+
|
65
|
+
def write(classifierfile)
|
66
|
+
%x{cp #{@instancebase} #{classifierfile}} # store training data as "modelfile"
|
67
|
+
File.chmod(0664,classifierfile)
|
68
|
+
end
|
69
|
+
|
70
|
+
def apply(infile,outfile)
|
71
|
+
temp_outfile = outfile+".temp"
|
72
|
+
successfully_run(@timblpath+@params+" -i "+@instancebase+" -t "+infile+" -o "+temp_outfile)
|
73
|
+
|
74
|
+
# if we have an empty input file, timbl will not produce an output file
|
75
|
+
unless FileTest.exists?(temp_outfile)
|
76
|
+
# STDERR.puts "[Timbl] Warning: Timbl failed to produce an outfile."
|
77
|
+
return false
|
78
|
+
end
|
79
|
+
|
80
|
+
# no error
|
81
|
+
timbl_out_to_malouf_out(temp_outfile,outfile)
|
82
|
+
File.unlink(temp_outfile)
|
83
|
+
|
84
|
+
# true iff outfile exists
|
85
|
+
if FileTest.exists?(outfile)
|
86
|
+
return true
|
87
|
+
else
|
88
|
+
# STDERR.puts "[Timbl] Warning: Final outfile could not be produced."
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
#####
|
95
|
+
def read_resultfile(filename)
|
96
|
+
begin
|
97
|
+
f = File.new(filename)
|
98
|
+
rescue
|
99
|
+
$stderr.puts "TiMBL error: cannot read TiMBL result file #{filemame}."
|
100
|
+
return nil
|
101
|
+
end
|
102
|
+
|
103
|
+
retv = []
|
104
|
+
|
105
|
+
f.each { |line|
|
106
|
+
line_results = []
|
107
|
+
pieces = line.split
|
108
|
+
|
109
|
+
while not(pieces.empty?)
|
110
|
+
label = pieces.shift
|
111
|
+
|
112
|
+
begin
|
113
|
+
confidence = pieces.shift.to_f
|
114
|
+
rescue
|
115
|
+
$stderr.puts "Error reading mallet output: invalid line: #{line}"
|
116
|
+
confidence = 0
|
117
|
+
end
|
118
|
+
|
119
|
+
line_results << [label, confidence]
|
120
|
+
end
|
121
|
+
retv << line_results
|
122
|
+
}
|
123
|
+
|
124
|
+
return retv
|
125
|
+
end
|
126
|
+
|
127
|
+
#########################
|
128
|
+
private
|
129
|
+
|
130
|
+
###
|
131
|
+
def successfully_run(command)
|
132
|
+
retv = Kernel.system(command)
|
133
|
+
unless retv
|
134
|
+
$stderr.puts "Error running classifier. Exiting."
|
135
|
+
$stderr.puts "Offending command: "+command
|
136
|
+
exit 1
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|