frprep 0.0.1.prealpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'getoptlong'
|
4
|
+
|
5
|
+
require 'rosy/RosyConfigData'
|
6
|
+
|
7
|
+
module Rosy
|
8
|
+
|
9
|
+
class OptParser
|
10
|
+
def self.parse(cmd_args)
|
11
|
+
|
12
|
+
##############################
|
13
|
+
# main starts here
|
14
|
+
##############################
|
15
|
+
|
16
|
+
##
|
17
|
+
# evaluate runtime arguments
|
18
|
+
|
19
|
+
tasks = {
|
20
|
+
"featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID, required for test, no default
|
21
|
+
[ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT], # set to featurize: 'train' or 'test', no default
|
22
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, featurize this split. Cannot use both this and -d
|
23
|
+
['--append', '-A', GetoptLong::NO_ARGUMENT]
|
24
|
+
],
|
25
|
+
"split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID, required, no default
|
26
|
+
[ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT] # percentage training data, default: 90
|
27
|
+
],
|
28
|
+
"train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID; if given, will train on split rather than all of main table
|
29
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT] # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
30
|
+
],
|
31
|
+
"test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
32
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
33
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, test on this split. Cannot use both this and -i
|
34
|
+
[ '--nooutput', '-N', GetoptLong::NO_ARGUMENT] # set this to prevent output of disambiguated test data
|
35
|
+
],
|
36
|
+
"eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
37
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
38
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
|
39
|
+
],
|
40
|
+
"inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
|
41
|
+
[ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
|
42
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
|
43
|
+
[ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
|
44
|
+
[ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
|
45
|
+
],
|
46
|
+
"services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
|
47
|
+
[ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
|
48
|
+
[ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
|
49
|
+
[ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
|
50
|
+
[ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
|
51
|
+
[ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
|
52
|
+
[ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
|
53
|
+
[ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
|
54
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
55
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
56
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
|
57
|
+
]
|
58
|
+
}
|
59
|
+
|
60
|
+
optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
|
61
|
+
[ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
|
62
|
+
[ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
|
63
|
+
]
|
64
|
+
|
65
|
+
tasks.values.each { |more_optnames|
|
66
|
+
optnames.concat more_optnames
|
67
|
+
}
|
68
|
+
|
69
|
+
optnames.uniq!
|
70
|
+
|
71
|
+
# asterisk: "explode" array into individual parameters
|
72
|
+
begin
|
73
|
+
opts = options_hash(GetoptLong.new(*optnames))
|
74
|
+
rescue
|
75
|
+
$stderr.puts "Error: unknown command line option: " + $!
|
76
|
+
exit 1
|
77
|
+
end
|
78
|
+
|
79
|
+
experiment_filename = nil
|
80
|
+
|
81
|
+
##
|
82
|
+
# are we being asked for help?
|
83
|
+
if opts['--help']
|
84
|
+
help()
|
85
|
+
exit(0)
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# now find the task
|
90
|
+
task = opts['--task']
|
91
|
+
# sanity checks for task
|
92
|
+
if task.nil?
|
93
|
+
help()
|
94
|
+
exit(0)
|
95
|
+
end
|
96
|
+
unless tasks.keys.include? task
|
97
|
+
$stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
|
98
|
+
exit 1
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# now evaluate the rest of the options
|
103
|
+
opts.each_pair { |opt,arg|
|
104
|
+
case opt
|
105
|
+
when '--help', '--task'
|
106
|
+
# we already handled this
|
107
|
+
when '--expfile'
|
108
|
+
experiment_filename = arg
|
109
|
+
else
|
110
|
+
# do we know this option?
|
111
|
+
unless tasks[task].assoc(opt)
|
112
|
+
$stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
|
113
|
+
$stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
if experiment_filename.nil?
|
121
|
+
$stderr.puts "I need an experiment file name, option --expfile|-e"
|
122
|
+
exit 1
|
123
|
+
end
|
124
|
+
|
125
|
+
##
|
126
|
+
# open config file
|
127
|
+
|
128
|
+
exp = RosyConfigData.new(experiment_filename)
|
129
|
+
|
130
|
+
# sanity checks
|
131
|
+
unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
|
132
|
+
$stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
|
136
|
+
# enduser mode?
|
137
|
+
$ENDUSER_MODE = exp.get("enduser_mode")
|
138
|
+
|
139
|
+
[exp, opts]
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
def self.help
|
144
|
+
$stderr.puts "
|
145
|
+
ROSY: semantic ROle assignment SYstem Version 0.2
|
146
|
+
|
147
|
+
Usage:
|
148
|
+
|
149
|
+
ruby rosy.rb --help|-h
|
150
|
+
|
151
|
+
gets you this help text.
|
152
|
+
|
153
|
+
ruby rosy.rb --task|-t featurize --expfile|-e <e>
|
154
|
+
[--dataset|-d <d>] [--testID|-i <i>]
|
155
|
+
[--logID|-l <l> ] [--append|-A]
|
156
|
+
featurizes input data and stores it in a database.
|
157
|
+
Enduser mode: dataset has to be 'test' (preset as default),
|
158
|
+
no --append.
|
159
|
+
|
160
|
+
--expfile <e> Use <e> as the experiment description and
|
161
|
+
configuration file
|
162
|
+
|
163
|
+
--dataset <d> Set to featurize: <d> is either 'train'
|
164
|
+
(put data into main table) or 'test' (put data
|
165
|
+
into separate test table with ID given using --testID)
|
166
|
+
Use at least one of --logID, --dataset.
|
167
|
+
|
168
|
+
--logID <l> Re-featurize the split with ID <l>:
|
169
|
+
Features that train on training instances are done
|
170
|
+
separately for each split.
|
171
|
+
Use at least one of --logID, --dataset.
|
172
|
+
|
173
|
+
--testID <i> Use <i> as the ID for the table to store the test data.
|
174
|
+
necessary only with '--dataset test'. default: #{default_test_ID()}.
|
175
|
+
|
176
|
+
--append Do not overwrite previously computed features
|
177
|
+
for this experiment.
|
178
|
+
Rather, append the new features
|
179
|
+
to the old featurization files.
|
180
|
+
Default: overwrite
|
181
|
+
|
182
|
+
ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
|
183
|
+
[--trainpercent|-r <r>]
|
184
|
+
produces a new train/test split on the main table of the experiment.
|
185
|
+
Not available in enduser mode.
|
186
|
+
|
187
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
188
|
+
|
189
|
+
--logID <l> Use <l> as the ID for storing this new split
|
190
|
+
|
191
|
+
--trainpercent <r> Allocate <r> percent of the data as train,
|
192
|
+
and 100-<r> as test
|
193
|
+
default: <r>=90
|
194
|
+
|
195
|
+
|
196
|
+
ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
|
197
|
+
train classifier(s) on the main table data (or a split of it)
|
198
|
+
Not available in enduser mode.
|
199
|
+
|
200
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
201
|
+
|
202
|
+
--step <s> What kind of classifier(s) to train?
|
203
|
+
<s>=argrec: argument recognition,
|
204
|
+
distinguish role from nonrole
|
205
|
+
<s>=arglab: argument labeling, naming roles,
|
206
|
+
builds on argrec
|
207
|
+
<s>=both: first argrec, then arglab
|
208
|
+
<s>=onestep: do argument labeling right away without
|
209
|
+
prior filtering of non-arguments
|
210
|
+
default: both
|
211
|
+
|
212
|
+
--logID <l> If given, train on this split of the main table rather than
|
213
|
+
the whole main table
|
214
|
+
|
215
|
+
|
216
|
+
ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
217
|
+
[--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
|
218
|
+
apply classifier(s) on data from a test table, or a main table split
|
219
|
+
Enduser mode: only -s both, -s onestep available. Cleanup: Database with
|
220
|
+
featurization data is removed after the run.
|
221
|
+
|
222
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
223
|
+
|
224
|
+
--step <s> What kind of classifier(s) to use for testing?
|
225
|
+
<s>=argrec: argument recognition,
|
226
|
+
distinguish role from nonrole
|
227
|
+
<s>=arglab: argument labeling, naming roles,
|
228
|
+
builds on argrec
|
229
|
+
<s>=both: first argrec, then arglab
|
230
|
+
<s>=onestep: do argument labeling right away without
|
231
|
+
prior filtering of non-arguments
|
232
|
+
default: both
|
233
|
+
--logID <l> If given, test on this split of the main table
|
234
|
+
|
235
|
+
--testID <i> If given, test on this test table.
|
236
|
+
(Use either this option or -l)
|
237
|
+
|
238
|
+
--nooutput Do not produce an output of the disambiguated test data
|
239
|
+
in SalsaTigerXML format. This is useful if you just want
|
240
|
+
to evaluate the system.
|
241
|
+
Default: output is produced.
|
242
|
+
|
243
|
+
|
244
|
+
ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
|
245
|
+
[--logID|-l <l> | --testID|-i <i>
|
246
|
+
evaluate the classification results.
|
247
|
+
Not available in enduser mode.
|
248
|
+
|
249
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
250
|
+
|
251
|
+
--step <s> Evaluate results of which classification step?
|
252
|
+
<s>=argrec: argument recognition,
|
253
|
+
distinguish role from nonrole
|
254
|
+
<s>=arglab: argument labeling, naming roles,
|
255
|
+
builds on argrec
|
256
|
+
<s>=both: first argrec, then arglab
|
257
|
+
<s>=onestep: do argument labeling right away without
|
258
|
+
prior filtering of non-arguments
|
259
|
+
default: both
|
260
|
+
Need not be given if --runID is given.
|
261
|
+
|
262
|
+
--logID <l> If given, evaluate on the test data from this split of
|
263
|
+
the main table.
|
264
|
+
(use either this option or -i or -R)
|
265
|
+
|
266
|
+
--testID <i> If given, evaluate on this test table.
|
267
|
+
(Use either this option or -l or -R)
|
268
|
+
|
269
|
+
|
270
|
+
ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
|
271
|
+
[--tablecont [N]] [--testID|-i <i>] [--split <l>]
|
272
|
+
inspect system-internal data, both global and pertaining to the current
|
273
|
+
experiment.
|
274
|
+
If no options are chosen, an overview of the current experiment
|
275
|
+
is given.
|
276
|
+
|
277
|
+
--expfile <f> Use <f> as the experiment description and
|
278
|
+
configuration file
|
279
|
+
|
280
|
+
--tables Lists all tables of the DB: table name,column names
|
281
|
+
|
282
|
+
--tablecont [N|id:N] Lists the training instances (as feature vectors)
|
283
|
+
of the current experiment.
|
284
|
+
If test ID is given, test instances are listed as well.
|
285
|
+
The optional argument may have one of two forms:
|
286
|
+
- It may be a number N. Then only the N first lines
|
287
|
+
of each set are listed.
|
288
|
+
- It may be a pair id:N. Then only the N first lines of
|
289
|
+
the DB table with ID id are listed. To list all lines
|
290
|
+
of a single DB table, use id:
|
291
|
+
|
292
|
+
--testID <i> If given, --tablecont also lists the feature vectors for
|
293
|
+
this test table
|
294
|
+
|
295
|
+
--runs List all classification runs of the current experiment
|
296
|
+
|
297
|
+
--split <l> List the split with the given ID
|
298
|
+
|
299
|
+
ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
|
300
|
+
[--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
|
301
|
+
[--delsplit <l>] [--writefeatures [<D>]]
|
302
|
+
[--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
|
303
|
+
diverse services.
|
304
|
+
The --del* services are not available in enduser mode.
|
305
|
+
|
306
|
+
--dump [<D>] Dump the database tables for the current experiment file.
|
307
|
+
If a directory <D> is given, the tables are written there,
|
308
|
+
otherwise they are written to
|
309
|
+
data_dir/<experiment_ID>/tables, where data_dir is the
|
310
|
+
data directory given in the experiment file.
|
311
|
+
No existing files in the directory are removed.
|
312
|
+
|
313
|
+
--load [<D>] Construct new database tables from the files in
|
314
|
+
the directory <D>, if it is given, otherwise from
|
315
|
+
data_dir/<experiment_id>/tables, where data_dir
|
316
|
+
is the data directory given in the experiment file.
|
317
|
+
Warning: Database tables are loaded into the
|
318
|
+
current experiment, the one described in the
|
319
|
+
experiment file. Existing data in tables with
|
320
|
+
the same names is overwritten!
|
321
|
+
|
322
|
+
--deltable <t> Remove database table <t>
|
323
|
+
|
324
|
+
--deltables Presents all tables in the database for interactive deletion
|
325
|
+
|
326
|
+
--delexp Remove the experiment described in the given experiment file,
|
327
|
+
all its database tables and files.
|
328
|
+
|
329
|
+
--delruns Presents all classification runs for the current experiment
|
330
|
+
for interactive deletion
|
331
|
+
|
332
|
+
--delsplit <l> Remove the split with ID <l> from the experiment
|
333
|
+
described in the given experiment file.
|
334
|
+
|
335
|
+
--writefeatures <D> Write feature files to directory <D>, such
|
336
|
+
that you can use them with some external machine learning
|
337
|
+
system. If <D> is not given, feature files are written
|
338
|
+
to data_dir/<experiment_id>/your_feature_files/.
|
339
|
+
|
340
|
+
Uses the parameters --step, --testID, --logID to
|
341
|
+
determine which feature files will be written.
|
342
|
+
|
343
|
+
--step <s> Use with --writefeatures: task for which to write features.
|
344
|
+
<s>=argrec: argument recognition,
|
345
|
+
distinguish role from nonrole
|
346
|
+
<s>=arglab: argument labeling, naming roles
|
347
|
+
<s>=onestep: do argument labeling right away without
|
348
|
+
prior filtering of non-arguments
|
349
|
+
default: onestep.
|
350
|
+
|
351
|
+
--logID <l> Use with --writefeatures: write features
|
352
|
+
for the the split with ID <l>.
|
353
|
+
|
354
|
+
--testID <i> Use with --writefeatures: write features
|
355
|
+
for the test set with ID <i>.
|
356
|
+
default: #{default_test_ID()}.
|
357
|
+
"
|
358
|
+
|
359
|
+
end
|
360
|
+
|
361
|
+
###
|
362
|
+
# options_hash:
|
363
|
+
#
|
364
|
+
# GetoptLong only allows you to access options via each(),
|
365
|
+
# not individually, and it only allows you to cycle through the options once.
|
366
|
+
# So we re-code the options as a hash
|
367
|
+
def self.options_hash(opts_obj) # GetoptLong object
|
368
|
+
opt_hash = Hash.new
|
369
|
+
|
370
|
+
opts_obj.each do |opt, arg|
|
371
|
+
opt_hash[opt] = arg
|
372
|
+
end
|
373
|
+
|
374
|
+
return opt_hash
|
375
|
+
end
|
376
|
+
|
377
|
+
end # class OptParser
|
378
|
+
|
379
|
+
end # module Rosy
|
data/lib/rosy/rosy.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# AB: 2011-11-14
|
2
|
+
# Initial import done, need to reimplement the whole interface.
|
3
|
+
|
4
|
+
require 'common/DBInterface'
|
5
|
+
require 'rosy/RosyFeaturize'
|
6
|
+
require 'rosy/RosyTest'
|
7
|
+
require 'rosy/RosyTrain'
|
8
|
+
require 'rosy/RosyInspect'
|
9
|
+
require 'rosy/RosyEval'
|
10
|
+
require 'rosy/RosyServices'
|
11
|
+
|
12
|
+
module Rosy
|
13
|
+
class Rosy
|
14
|
+
|
15
|
+
def initialize(options)
|
16
|
+
@exp, @opts = options
|
17
|
+
@task = @opts['--task']
|
18
|
+
end
|
19
|
+
|
20
|
+
def assign
|
21
|
+
|
22
|
+
# make rosy directory pattern:
|
23
|
+
# main rosy directory name (data_dir) plus subdirectory
|
24
|
+
# named after the experiment ID
|
25
|
+
rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
|
26
|
+
@exp.set_entry("rosy_dir", rosy_dir_pattern)
|
27
|
+
|
28
|
+
##
|
29
|
+
# open database
|
30
|
+
|
31
|
+
rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
|
32
|
+
"exp_ID" => @exp.get("experiment_ID")))
|
33
|
+
database = get_db_interface(@exp, rosy_dir, "features")
|
34
|
+
|
35
|
+
table_obj = RosyTrainingTestTable.new(@exp, database)
|
36
|
+
|
37
|
+
##
|
38
|
+
# start the actual processing,
|
39
|
+
# according to given arguments
|
40
|
+
|
41
|
+
# initialize task object
|
42
|
+
#begin
|
43
|
+
case @task
|
44
|
+
when "featurize"
|
45
|
+
task_obj = RosyFeaturize.new(@exp, @opts, table_obj)
|
46
|
+
when "split"
|
47
|
+
task_obj = RosySplit.new(@exp, @opts, table_obj)
|
48
|
+
when "train"
|
49
|
+
task_obj = RosyTrain.new(@exp, @opts, table_obj)
|
50
|
+
when "test"
|
51
|
+
task_obj = RosyTest.new(@exp, @opts, table_obj)
|
52
|
+
when "eval"
|
53
|
+
task_obj = RosyEvalTask.new(@exp, @opts, table_obj)
|
54
|
+
when "inspect"
|
55
|
+
task_obj = RosyInspect.new(@exp, @opts, table_obj)
|
56
|
+
when "services"
|
57
|
+
task_obj = RosyServices.new(@exp, @opts, table_obj)
|
58
|
+
else
|
59
|
+
raise "Shouldn't be here"
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# execute task
|
64
|
+
begin
|
65
|
+
task_obj.perform
|
66
|
+
rescue => e
|
67
|
+
fail "Error during task execution: #{e.class}=>#{e.message}"
|
68
|
+
ensure
|
69
|
+
database.close
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
$stderr.puts "Rosy: done."
|
74
|
+
end
|
75
|
+
|
76
|
+
end # class Rosy
|
77
|
+
end # module Rosy
|