shalmaneser 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/shalmaneser +8 -2
- data/doc/index.md +1 -0
- data/lib/shalmaneser/opt_parser.rb +68 -67
- metadata +49 -119
- data/bin/fred +0 -16
- data/bin/frprep +0 -34
- data/bin/rosy +0 -17
- data/lib/common/AbstractSynInterface.rb +0 -1229
- data/lib/common/Counter.rb +0 -18
- data/lib/common/EnduserMode.rb +0 -27
- data/lib/common/Eval.rb +0 -480
- data/lib/common/FixSynSemMapping.rb +0 -196
- data/lib/common/Graph.rb +0 -345
- data/lib/common/ISO-8859-1.rb +0 -24
- data/lib/common/ML.rb +0 -186
- data/lib/common/Mallet.rb +0 -236
- data/lib/common/Maxent.rb +0 -229
- data/lib/common/Optimise.rb +0 -195
- data/lib/common/Parser.rb +0 -213
- data/lib/common/RegXML.rb +0 -269
- data/lib/common/RosyConventions.rb +0 -171
- data/lib/common/STXmlTerminalOrder.rb +0 -194
- data/lib/common/SalsaTigerRegXML.rb +0 -2347
- data/lib/common/SalsaTigerXMLHelper.rb +0 -99
- data/lib/common/SynInterfaces.rb +0 -282
- data/lib/common/TabFormat.rb +0 -721
- data/lib/common/Tiger.rb +0 -1448
- data/lib/common/Timbl.rb +0 -144
- data/lib/common/Tree.rb +0 -61
- data/lib/common/config_data.rb +0 -470
- data/lib/common/config_format_element.rb +0 -220
- data/lib/common/headz.rb +0 -338
- data/lib/common/option_parser.rb +0 -13
- data/lib/common/prep_config_data.rb +0 -62
- data/lib/common/prep_helper.rb +0 -1330
- data/lib/common/ruby_class_extensions.rb +0 -310
- data/lib/db/db_interface.rb +0 -48
- data/lib/db/db_mysql.rb +0 -145
- data/lib/db/db_sqlite.rb +0 -280
- data/lib/db/db_table.rb +0 -239
- data/lib/db/db_wrapper.rb +0 -176
- data/lib/db/sql_query.rb +0 -243
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredConventions.rb +0 -232
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred.rb +0 -47
- data/lib/fred/fred_config_data.rb +0 -185
- data/lib/fred/md5.rb +0 -23
- data/lib/fred/opt_parser.rb +0 -250
- data/lib/frprep/Ampersand.rb +0 -39
- data/lib/frprep/CollinsInterface.rb +0 -1165
- data/lib/frprep/Counter.rb +0 -18
- data/lib/frprep/FNCorpusXML.rb +0 -643
- data/lib/frprep/FNDatabase.rb +0 -144
- data/lib/frprep/FrameXML.rb +0 -513
- data/lib/frprep/Graph.rb +0 -345
- data/lib/frprep/MiniparInterface.rb +0 -1388
- data/lib/frprep/RegXML.rb +0 -269
- data/lib/frprep/STXmlTerminalOrder.rb +0 -194
- data/lib/frprep/SleepyInterface.rb +0 -384
- data/lib/frprep/TntInterface.rb +0 -44
- data/lib/frprep/TreetaggerInterface.rb +0 -327
- data/lib/frprep/do_parses.rb +0 -143
- data/lib/frprep/frprep.rb +0 -693
- data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
- data/lib/frprep/interfaces/stanford_interface.rb +0 -353
- data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
- data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
- data/lib/frprep/one_parsed_file.rb +0 -28
- data/lib/frprep/opt_parser.rb +0 -94
- data/lib/frprep/ruby_class_extensions.rb +0 -310
- data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
- data/lib/rosy/ExternalConfigData.rb +0 -58
- data/lib/rosy/FailedParses.rb +0 -130
- data/lib/rosy/FeatureInfo.rb +0 -242
- data/lib/rosy/GfInduce.rb +0 -1115
- data/lib/rosy/GfInduceFeature.rb +0 -148
- data/lib/rosy/InputData.rb +0 -294
- data/lib/rosy/RosyConfusability.rb +0 -338
- data/lib/rosy/RosyEval.rb +0 -465
- data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
- data/lib/rosy/RosyFeaturize.rb +0 -281
- data/lib/rosy/RosyInspect.rb +0 -336
- data/lib/rosy/RosyIterator.rb +0 -478
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
- data/lib/rosy/RosyPruning.rb +0 -165
- data/lib/rosy/RosyServices.rb +0 -744
- data/lib/rosy/RosySplit.rb +0 -232
- data/lib/rosy/RosyTask.rb +0 -19
- data/lib/rosy/RosyTest.rb +0 -829
- data/lib/rosy/RosyTrain.rb +0 -234
- data/lib/rosy/RosyTrainingTestTable.rb +0 -787
- data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
- data/lib/rosy/View.rb +0 -418
- data/lib/rosy/opt_parser.rb +0 -379
- data/lib/rosy/rosy.rb +0 -78
- data/lib/rosy/rosy_config_data.rb +0 -121
- data/lib/shalmaneser/version.rb +0 -3
data/lib/rosy/opt_parser.rb
DELETED
@@ -1,379 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require 'getoptlong'
|
4
|
-
|
5
|
-
require 'rosy/rosy_config_data'
|
6
|
-
|
7
|
-
module Rosy
|
8
|
-
|
9
|
-
class OptParser
|
10
|
-
def self.parse(cmd_args)
|
11
|
-
|
12
|
-
##############################
|
13
|
-
# main starts here
|
14
|
-
##############################
|
15
|
-
|
16
|
-
##
|
17
|
-
# evaluate runtime arguments
|
18
|
-
|
19
|
-
tasks = {
|
20
|
-
"featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID, required for test, no default
|
21
|
-
[ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT], # set to featurize: 'train' or 'test', no default
|
22
|
-
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, featurize this split. Cannot use both this and -d
|
23
|
-
['--append', '-A', GetoptLong::NO_ARGUMENT]
|
24
|
-
],
|
25
|
-
"split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID, required, no default
|
26
|
-
[ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT] # percentage training data, default: 90
|
27
|
-
],
|
28
|
-
"train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID; if given, will train on split rather than all of main table
|
29
|
-
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT] # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
30
|
-
],
|
31
|
-
"test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
32
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
33
|
-
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, test on this split. Cannot use both this and -i
|
34
|
-
[ '--nooutput', '-N', GetoptLong::NO_ARGUMENT] # set this to prevent output of disambiguated test data
|
35
|
-
],
|
36
|
-
"eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
37
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
38
|
-
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
|
39
|
-
],
|
40
|
-
"inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
|
41
|
-
[ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
|
42
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
|
43
|
-
[ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
|
44
|
-
[ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
|
45
|
-
],
|
46
|
-
"services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
|
47
|
-
[ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
|
48
|
-
[ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
|
49
|
-
[ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
|
50
|
-
[ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
|
51
|
-
[ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
|
52
|
-
[ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
|
53
|
-
[ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
|
54
|
-
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
55
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
56
|
-
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
|
57
|
-
]
|
58
|
-
}
|
59
|
-
|
60
|
-
optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
|
61
|
-
[ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
|
62
|
-
[ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
|
63
|
-
]
|
64
|
-
|
65
|
-
tasks.values.each { |more_optnames|
|
66
|
-
optnames.concat more_optnames
|
67
|
-
}
|
68
|
-
|
69
|
-
optnames.uniq!
|
70
|
-
|
71
|
-
# asterisk: "explode" array into individual parameters
|
72
|
-
begin
|
73
|
-
opts = options_hash(GetoptLong.new(*optnames))
|
74
|
-
rescue
|
75
|
-
$stderr.puts "Error: unknown command line option: " + $!
|
76
|
-
exit 1
|
77
|
-
end
|
78
|
-
|
79
|
-
experiment_filename = nil
|
80
|
-
|
81
|
-
##
|
82
|
-
# are we being asked for help?
|
83
|
-
if opts['--help']
|
84
|
-
help()
|
85
|
-
exit(0)
|
86
|
-
end
|
87
|
-
|
88
|
-
##
|
89
|
-
# now find the task
|
90
|
-
task = opts['--task']
|
91
|
-
# sanity checks for task
|
92
|
-
if task.nil?
|
93
|
-
help()
|
94
|
-
exit(0)
|
95
|
-
end
|
96
|
-
unless tasks.keys.include? task
|
97
|
-
$stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
|
98
|
-
exit 1
|
99
|
-
end
|
100
|
-
|
101
|
-
##
|
102
|
-
# now evaluate the rest of the options
|
103
|
-
opts.each_pair { |opt,arg|
|
104
|
-
case opt
|
105
|
-
when '--help', '--task'
|
106
|
-
# we already handled this
|
107
|
-
when '--expfile'
|
108
|
-
experiment_filename = arg
|
109
|
-
else
|
110
|
-
# do we know this option?
|
111
|
-
unless tasks[task].assoc(opt)
|
112
|
-
$stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
|
113
|
-
$stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
|
114
|
-
exit 1
|
115
|
-
end
|
116
|
-
end
|
117
|
-
}
|
118
|
-
|
119
|
-
|
120
|
-
if experiment_filename.nil?
|
121
|
-
$stderr.puts "I need an experiment file name, option --expfile|-e"
|
122
|
-
exit 1
|
123
|
-
end
|
124
|
-
|
125
|
-
##
|
126
|
-
# open config file
|
127
|
-
|
128
|
-
exp = RosyConfigData.new(experiment_filename)
|
129
|
-
|
130
|
-
# sanity checks
|
131
|
-
unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
|
132
|
-
$stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
|
133
|
-
exit 1
|
134
|
-
end
|
135
|
-
|
136
|
-
# enduser mode?
|
137
|
-
$ENDUSER_MODE = exp.get("enduser_mode")
|
138
|
-
|
139
|
-
[exp, opts]
|
140
|
-
end
|
141
|
-
|
142
|
-
private
|
143
|
-
def self.help
|
144
|
-
$stderr.puts "
|
145
|
-
ROSY: semantic ROle assignment SYstem Version 0.2
|
146
|
-
|
147
|
-
Usage:
|
148
|
-
|
149
|
-
ruby rosy.rb --help|-h
|
150
|
-
|
151
|
-
gets you this help text.
|
152
|
-
|
153
|
-
ruby rosy.rb --task|-t featurize --expfile|-e <e>
|
154
|
-
[--dataset|-d <d>] [--testID|-i <i>]
|
155
|
-
[--logID|-l <l> ] [--append|-A]
|
156
|
-
featurizes input data and stores it in a database.
|
157
|
-
Enduser mode: dataset has to be 'test' (preset as default),
|
158
|
-
no --append.
|
159
|
-
|
160
|
-
--expfile <e> Use <e> as the experiment description and
|
161
|
-
configuration file
|
162
|
-
|
163
|
-
--dataset <d> Set to featurize: <d> is either 'train'
|
164
|
-
(put data into main table) or 'test' (put data
|
165
|
-
into separate test table with ID given using --testID)
|
166
|
-
Use at least one of --logID, --dataset.
|
167
|
-
|
168
|
-
--logID <l> Re-featurize the split with ID <l>:
|
169
|
-
Features that train on training instances are done
|
170
|
-
separately for each split.
|
171
|
-
Use at least one of --logID, --dataset.
|
172
|
-
|
173
|
-
--testID <i> Use <i> as the ID for the table to store the test data.
|
174
|
-
necessary only with '--dataset test'. default: #{default_test_ID()}.
|
175
|
-
|
176
|
-
--append Do not overwrite previously computed features
|
177
|
-
for this experiment.
|
178
|
-
Rather, append the new features
|
179
|
-
to the old featurization files.
|
180
|
-
Default: overwrite
|
181
|
-
|
182
|
-
ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
|
183
|
-
[--trainpercent|-r <r>]
|
184
|
-
produces a new train/test split on the main table of the experiment.
|
185
|
-
Not available in enduser mode.
|
186
|
-
|
187
|
-
--expfile <f> Use <f> as the experiment description and configuration file
|
188
|
-
|
189
|
-
--logID <l> Use <l> as the ID for storing this new split
|
190
|
-
|
191
|
-
--trainpercent <r> Allocate <r> percent of the data as train,
|
192
|
-
and 100-<r> as test
|
193
|
-
default: <r>=90
|
194
|
-
|
195
|
-
|
196
|
-
ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
|
197
|
-
train classifier(s) on the main table data (or a split of it)
|
198
|
-
Not available in enduser mode.
|
199
|
-
|
200
|
-
--expfile <f> Use <f> as the experiment description and configuration file
|
201
|
-
|
202
|
-
--step <s> What kind of classifier(s) to train?
|
203
|
-
<s>=argrec: argument recognition,
|
204
|
-
distinguish role from nonrole
|
205
|
-
<s>=arglab: argument labeling, naming roles,
|
206
|
-
builds on argrec
|
207
|
-
<s>=both: first argrec, then arglab
|
208
|
-
<s>=onestep: do argument labeling right away without
|
209
|
-
prior filtering of non-arguments
|
210
|
-
default: both
|
211
|
-
|
212
|
-
--logID <l> If given, train on this split of the main table rather than
|
213
|
-
the whole main table
|
214
|
-
|
215
|
-
|
216
|
-
ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
217
|
-
[--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
|
218
|
-
apply classifier(s) on data from a test table, or a main table split
|
219
|
-
Enduser mode: only -s both, -s onestep available. Cleanup: Database with
|
220
|
-
featurization data is removed after the run.
|
221
|
-
|
222
|
-
--expfile <f> Use <f> as the experiment description and configuration file
|
223
|
-
|
224
|
-
--step <s> What kind of classifier(s) to use for testing?
|
225
|
-
<s>=argrec: argument recognition,
|
226
|
-
distinguish role from nonrole
|
227
|
-
<s>=arglab: argument labeling, naming roles,
|
228
|
-
builds on argrec
|
229
|
-
<s>=both: first argrec, then arglab
|
230
|
-
<s>=onestep: do argument labeling right away without
|
231
|
-
prior filtering of non-arguments
|
232
|
-
default: both
|
233
|
-
--logID <l> If given, test on this split of the main table
|
234
|
-
|
235
|
-
--testID <i> If given, test on this test table.
|
236
|
-
(Use either this option or -l)
|
237
|
-
|
238
|
-
--nooutput Do not produce an output of the disambiguated test data
|
239
|
-
in SalsaTigerXML format. This is useful if you just want
|
240
|
-
to evaluate the system.
|
241
|
-
Default: output is produced.
|
242
|
-
|
243
|
-
|
244
|
-
ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
|
245
|
-
[--logID|-l <l> | --testID|-i <i>
|
246
|
-
evaluate the classification results.
|
247
|
-
Not available in enduser mode.
|
248
|
-
|
249
|
-
--expfile <f> Use <f> as the experiment description and configuration file
|
250
|
-
|
251
|
-
--step <s> Evaluate results of which classification step?
|
252
|
-
<s>=argrec: argument recognition,
|
253
|
-
distinguish role from nonrole
|
254
|
-
<s>=arglab: argument labeling, naming roles,
|
255
|
-
builds on argrec
|
256
|
-
<s>=both: first argrec, then arglab
|
257
|
-
<s>=onestep: do argument labeling right away without
|
258
|
-
prior filtering of non-arguments
|
259
|
-
default: both
|
260
|
-
Need not be given if --runID is given.
|
261
|
-
|
262
|
-
--logID <l> If given, evaluate on the test data from this split of
|
263
|
-
the main table.
|
264
|
-
(use either this option or -i or -R)
|
265
|
-
|
266
|
-
--testID <i> If given, evaluate on this test table.
|
267
|
-
(Use either this option or -l or -R)
|
268
|
-
|
269
|
-
|
270
|
-
ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
|
271
|
-
[--tablecont [N]] [--testID|-i <i>] [--split <l>]
|
272
|
-
inspect system-internal data, both global and pertaining to the current
|
273
|
-
experiment.
|
274
|
-
If no options are chosen, an overview of the current experiment
|
275
|
-
is given.
|
276
|
-
|
277
|
-
--expfile <f> Use <f> as the experiment description and
|
278
|
-
configuration file
|
279
|
-
|
280
|
-
--tables Lists all tables of the DB: table name,column names
|
281
|
-
|
282
|
-
--tablecont [N|id:N] Lists the training instances (as feature vectors)
|
283
|
-
of the current experiment.
|
284
|
-
If test ID is given, test instances are listed as well.
|
285
|
-
The optional argument may have one of two forms:
|
286
|
-
- It may be a number N. Then only the N first lines
|
287
|
-
of each set are listed.
|
288
|
-
- It may be a pair id:N. Then only the N first lines of
|
289
|
-
the DB table with ID id are listed. To list all lines
|
290
|
-
of a single DB table, use id:
|
291
|
-
|
292
|
-
--testID <i> If given, --tablecont also lists the feature vectors for
|
293
|
-
this test table
|
294
|
-
|
295
|
-
--runs List all classification runs of the current experiment
|
296
|
-
|
297
|
-
--split <l> List the split with the given ID
|
298
|
-
|
299
|
-
ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
|
300
|
-
[--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
|
301
|
-
[--delsplit <l>] [--writefeatures [<D>]]
|
302
|
-
[--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
|
303
|
-
diverse services.
|
304
|
-
The --del* services are not available in enduser mode.
|
305
|
-
|
306
|
-
--dump [<D>] Dump the database tables for the current experiment file.
|
307
|
-
If a directory <D> is given, the tables are written there,
|
308
|
-
otherwise they are written to
|
309
|
-
data_dir/<experiment_ID>/tables, where data_dir is the
|
310
|
-
data directory given in the experiment file.
|
311
|
-
No existing files in the directory are removed.
|
312
|
-
|
313
|
-
--load [<D>] Construct new database tables from the files in
|
314
|
-
the directory <D>, if it is given, otherwise from
|
315
|
-
data_dir/<experiment_id>/tables, where data_dir
|
316
|
-
is the data directory given in the experiment file.
|
317
|
-
Warning: Database tables are loaded into the
|
318
|
-
current experiment, the one described in the
|
319
|
-
experiment file. Existing data in tables with
|
320
|
-
the same names is overwritten!
|
321
|
-
|
322
|
-
--deltable <t> Remove database table <t>
|
323
|
-
|
324
|
-
--deltables Presents all tables in the database for interactive deletion
|
325
|
-
|
326
|
-
--delexp Remove the experiment described in the given experiment file,
|
327
|
-
all its database tables and files.
|
328
|
-
|
329
|
-
--delruns Presents all classification runs for the current experiment
|
330
|
-
for interactive deletion
|
331
|
-
|
332
|
-
--delsplit <l> Remove the split with ID <l> from the experiment
|
333
|
-
described in the given experiment file.
|
334
|
-
|
335
|
-
--writefeatures <D> Write feature files to directory <D>, such
|
336
|
-
that you can use them with some external machine learning
|
337
|
-
system. If <D> is not given, feature files are written
|
338
|
-
to data_dir/<experiment_id>/your_feature_files/.
|
339
|
-
|
340
|
-
Uses the parameters --step, --testID, --logID to
|
341
|
-
determine which feature files will be written.
|
342
|
-
|
343
|
-
--step <s> Use with --writefeatures: task for which to write features.
|
344
|
-
<s>=argrec: argument recognition,
|
345
|
-
distinguish role from nonrole
|
346
|
-
<s>=arglab: argument labeling, naming roles
|
347
|
-
<s>=onestep: do argument labeling right away without
|
348
|
-
prior filtering of non-arguments
|
349
|
-
default: onestep.
|
350
|
-
|
351
|
-
--logID <l> Use with --writefeatures: write features
|
352
|
-
for the the split with ID <l>.
|
353
|
-
|
354
|
-
--testID <i> Use with --writefeatures: write features
|
355
|
-
for the test set with ID <i>.
|
356
|
-
default: #{default_test_ID()}.
|
357
|
-
"
|
358
|
-
|
359
|
-
end
|
360
|
-
|
361
|
-
###
|
362
|
-
# options_hash:
|
363
|
-
#
|
364
|
-
# GetoptLong only allows you to access options via each(),
|
365
|
-
# not individually, and it only allows you to cycle through the options once.
|
366
|
-
# So we re-code the options as a hash
|
367
|
-
def self.options_hash(opts_obj) # GetoptLong object
|
368
|
-
opt_hash = Hash.new
|
369
|
-
|
370
|
-
opts_obj.each do |opt, arg|
|
371
|
-
opt_hash[opt] = arg
|
372
|
-
end
|
373
|
-
|
374
|
-
return opt_hash
|
375
|
-
end
|
376
|
-
|
377
|
-
end # class OptParser
|
378
|
-
|
379
|
-
end # module Rosy
|
data/lib/rosy/rosy.rb
DELETED
@@ -1,78 +0,0 @@
|
|
1
|
-
# AB: 2011-11-14
|
2
|
-
# Initial import done, need to reimplement the whole interface.
|
3
|
-
|
4
|
-
require 'db/db_interface'
|
5
|
-
require 'rosy/RosyFeaturize'
|
6
|
-
require 'rosy/RosyTest'
|
7
|
-
require 'rosy/RosyTrain'
|
8
|
-
require 'rosy/RosyInspect'
|
9
|
-
require 'rosy/RosyEval'
|
10
|
-
require 'rosy/RosyServices'
|
11
|
-
|
12
|
-
module Rosy
|
13
|
-
class Rosy
|
14
|
-
|
15
|
-
def initialize(options)
|
16
|
-
@exp, @opts = options
|
17
|
-
@task = @opts['--task']
|
18
|
-
end
|
19
|
-
|
20
|
-
def assign
|
21
|
-
|
22
|
-
# make rosy directory pattern:
|
23
|
-
# main rosy directory name (data_dir) plus subdirectory
|
24
|
-
# named after the experiment ID
|
25
|
-
rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
|
26
|
-
@exp.set_entry("rosy_dir", rosy_dir_pattern)
|
27
|
-
|
28
|
-
##
|
29
|
-
# open database
|
30
|
-
|
31
|
-
rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
|
32
|
-
"exp_ID" => @exp.get("experiment_ID")))
|
33
|
-
database = get_db_interface(@exp, rosy_dir, "features")
|
34
|
-
|
35
|
-
table_obj = RosyTrainingTestTable.new(@exp, database)
|
36
|
-
|
37
|
-
##
|
38
|
-
# start the actual processing,
|
39
|
-
# according to given arguments
|
40
|
-
|
41
|
-
# initialize task object
|
42
|
-
#begin
|
43
|
-
case @task
|
44
|
-
when "featurize"
|
45
|
-
task_obj = RosyFeaturize.new(@exp, @opts, table_obj)
|
46
|
-
when "split"
|
47
|
-
task_obj = RosySplit.new(@exp, @opts, table_obj)
|
48
|
-
when "train"
|
49
|
-
task_obj = RosyTrain.new(@exp, @opts, table_obj)
|
50
|
-
when "test"
|
51
|
-
task_obj = RosyTest.new(@exp, @opts, table_obj)
|
52
|
-
when "eval"
|
53
|
-
task_obj = RosyEvalTask.new(@exp, @opts, table_obj)
|
54
|
-
when "inspect"
|
55
|
-
task_obj = RosyInspect.new(@exp, @opts, table_obj)
|
56
|
-
when "services"
|
57
|
-
task_obj = RosyServices.new(@exp, @opts, table_obj)
|
58
|
-
else
|
59
|
-
raise "Shouldn't be here"
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
# execute task
|
64
|
-
begin
|
65
|
-
task_obj.perform
|
66
|
-
rescue => e
|
67
|
-
puts e.backtrace
|
68
|
-
fail "Error during task execution: #{e.class}=>#{e.message}"
|
69
|
-
ensure
|
70
|
-
database.close
|
71
|
-
end
|
72
|
-
|
73
|
-
|
74
|
-
$stderr.puts "Rosy: done."
|
75
|
-
end
|
76
|
-
|
77
|
-
end # class Rosy
|
78
|
-
end # module Rosy
|