frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'getoptlong'
|
4
|
+
|
5
|
+
require 'rosy/RosyConfigData'
|
6
|
+
|
7
|
+
module Rosy
|
8
|
+
|
9
|
+
class OptParser
|
10
|
+
def self.parse(cmd_args)
|
11
|
+
|
12
|
+
##############################
|
13
|
+
# main starts here
|
14
|
+
##############################
|
15
|
+
|
16
|
+
##
|
17
|
+
# evaluate runtime arguments
|
18
|
+
|
19
|
+
tasks = {
|
20
|
+
"featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID, required for test, no default
|
21
|
+
[ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT], # set to featurize: 'train' or 'test', no default
|
22
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, featurize this split. Cannot use both this and -d
|
23
|
+
['--append', '-A', GetoptLong::NO_ARGUMENT]
|
24
|
+
],
|
25
|
+
"split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID, required, no default
|
26
|
+
[ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT] # percentage training data, default: 90
|
27
|
+
],
|
28
|
+
"train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID; if given, will train on split rather than all of main table
|
29
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT] # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
30
|
+
],
|
31
|
+
"test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
32
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
33
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, test on this split. Cannot use both this and -i
|
34
|
+
[ '--nooutput', '-N', GetoptLong::NO_ARGUMENT] # set this to prevent output of disambiguated test data
|
35
|
+
],
|
36
|
+
"eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
37
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
38
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
|
39
|
+
],
|
40
|
+
"inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
|
41
|
+
[ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
|
42
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
|
43
|
+
[ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
|
44
|
+
[ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
|
45
|
+
],
|
46
|
+
"services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
|
47
|
+
[ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
|
48
|
+
[ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
|
49
|
+
[ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
|
50
|
+
[ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
|
51
|
+
[ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
|
52
|
+
[ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
|
53
|
+
[ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
|
54
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
55
|
+
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
56
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
|
57
|
+
]
|
58
|
+
}
|
59
|
+
|
60
|
+
optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
|
61
|
+
[ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
|
62
|
+
[ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
|
63
|
+
]
|
64
|
+
|
65
|
+
tasks.values.each { |more_optnames|
|
66
|
+
optnames.concat more_optnames
|
67
|
+
}
|
68
|
+
|
69
|
+
optnames.uniq!
|
70
|
+
|
71
|
+
# asterisk: "explode" array into individual parameters
|
72
|
+
begin
|
73
|
+
opts = options_hash(GetoptLong.new(*optnames))
|
74
|
+
rescue
|
75
|
+
$stderr.puts "Error: unknown command line option: " + $!
|
76
|
+
exit 1
|
77
|
+
end
|
78
|
+
|
79
|
+
experiment_filename = nil
|
80
|
+
|
81
|
+
##
|
82
|
+
# are we being asked for help?
|
83
|
+
if opts['--help']
|
84
|
+
help()
|
85
|
+
exit(0)
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# now find the task
|
90
|
+
task = opts['--task']
|
91
|
+
# sanity checks for task
|
92
|
+
if task.nil?
|
93
|
+
help()
|
94
|
+
exit(0)
|
95
|
+
end
|
96
|
+
unless tasks.keys.include? task
|
97
|
+
$stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
|
98
|
+
exit 1
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# now evaluate the rest of the options
|
103
|
+
opts.each_pair { |opt,arg|
|
104
|
+
case opt
|
105
|
+
when '--help', '--task'
|
106
|
+
# we already handled this
|
107
|
+
when '--expfile'
|
108
|
+
experiment_filename = arg
|
109
|
+
else
|
110
|
+
# do we know this option?
|
111
|
+
unless tasks[task].assoc(opt)
|
112
|
+
$stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
|
113
|
+
$stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
if experiment_filename.nil?
|
121
|
+
$stderr.puts "I need an experiment file name, option --expfile|-e"
|
122
|
+
exit 1
|
123
|
+
end
|
124
|
+
|
125
|
+
##
|
126
|
+
# open config file
|
127
|
+
|
128
|
+
exp = RosyConfigData.new(experiment_filename)
|
129
|
+
|
130
|
+
# sanity checks
|
131
|
+
unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
|
132
|
+
$stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
|
136
|
+
# enduser mode?
|
137
|
+
$ENDUSER_MODE = exp.get("enduser_mode")
|
138
|
+
|
139
|
+
[exp, opts]
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
def self.help
|
144
|
+
$stderr.puts "
|
145
|
+
ROSY: semantic ROle assignment SYstem Version 0.2
|
146
|
+
|
147
|
+
Usage:
|
148
|
+
|
149
|
+
ruby rosy.rb --help|-h
|
150
|
+
|
151
|
+
gets you this help text.
|
152
|
+
|
153
|
+
ruby rosy.rb --task|-t featurize --expfile|-e <e>
|
154
|
+
[--dataset|-d <d>] [--testID|-i <i>]
|
155
|
+
[--logID|-l <l> ] [--append|-A]
|
156
|
+
featurizes input data and stores it in a database.
|
157
|
+
Enduser mode: dataset has to be 'test' (preset as default),
|
158
|
+
no --append.
|
159
|
+
|
160
|
+
--expfile <e> Use <e> as the experiment description and
|
161
|
+
configuration file
|
162
|
+
|
163
|
+
--dataset <d> Set to featurize: <d> is either 'train'
|
164
|
+
(put data into main table) or 'test' (put data
|
165
|
+
into separate test table with ID given using --testID)
|
166
|
+
Use at least one of --logID, --dataset.
|
167
|
+
|
168
|
+
--logID <l> Re-featurize the split with ID <l>:
|
169
|
+
Features that train on training instances are done
|
170
|
+
separately for each split.
|
171
|
+
Use at least one of --logID, --dataset.
|
172
|
+
|
173
|
+
--testID <i> Use <i> as the ID for the table to store the test data.
|
174
|
+
necessary only with '--dataset test'. default: #{default_test_ID()}.
|
175
|
+
|
176
|
+
--append Do not overwrite previously computed features
|
177
|
+
for this experiment.
|
178
|
+
Rather, append the new features
|
179
|
+
to the old featurization files.
|
180
|
+
Default: overwrite
|
181
|
+
|
182
|
+
ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
|
183
|
+
[--trainpercent|-r <r>]
|
184
|
+
produces a new train/test split on the main table of the experiment.
|
185
|
+
Not available in enduser mode.
|
186
|
+
|
187
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
188
|
+
|
189
|
+
--logID <l> Use <l> as the ID for storing this new split
|
190
|
+
|
191
|
+
--trainpercent <r> Allocate <r> percent of the data as train,
|
192
|
+
and 100-<r> as test
|
193
|
+
default: <r>=90
|
194
|
+
|
195
|
+
|
196
|
+
ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
|
197
|
+
train classifier(s) on the main table data (or a split of it)
|
198
|
+
Not available in enduser mode.
|
199
|
+
|
200
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
201
|
+
|
202
|
+
--step <s> What kind of classifier(s) to train?
|
203
|
+
<s>=argrec: argument recognition,
|
204
|
+
distinguish role from nonrole
|
205
|
+
<s>=arglab: argument labeling, naming roles,
|
206
|
+
builds on argrec
|
207
|
+
<s>=both: first argrec, then arglab
|
208
|
+
<s>=onestep: do argument labeling right away without
|
209
|
+
prior filtering of non-arguments
|
210
|
+
default: both
|
211
|
+
|
212
|
+
--logID <l> If given, train on this split of the main table rather than
|
213
|
+
the whole main table
|
214
|
+
|
215
|
+
|
216
|
+
ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
217
|
+
[--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
|
218
|
+
apply classifier(s) on data from a test table, or a main table split
|
219
|
+
Enduser mode: only -s both, -s onestep available. Cleanup: Database with
|
220
|
+
featurization data is removed after the run.
|
221
|
+
|
222
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
223
|
+
|
224
|
+
--step <s> What kind of classifier(s) to use for testing?
|
225
|
+
<s>=argrec: argument recognition,
|
226
|
+
distinguish role from nonrole
|
227
|
+
<s>=arglab: argument labeling, naming roles,
|
228
|
+
builds on argrec
|
229
|
+
<s>=both: first argrec, then arglab
|
230
|
+
<s>=onestep: do argument labeling right away without
|
231
|
+
prior filtering of non-arguments
|
232
|
+
default: both
|
233
|
+
--logID <l> If given, test on this split of the main table
|
234
|
+
|
235
|
+
--testID <i> If given, test on this test table.
|
236
|
+
(Use either this option or -l)
|
237
|
+
|
238
|
+
--nooutput Do not produce an output of the disambiguated test data
|
239
|
+
in SalsaTigerXML format. This is useful if you just want
|
240
|
+
to evaluate the system.
|
241
|
+
Default: output is produced.
|
242
|
+
|
243
|
+
|
244
|
+
ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
|
245
|
+
[--logID|-l <l> | --testID|-i <i>
|
246
|
+
evaluate the classification results.
|
247
|
+
Not available in enduser mode.
|
248
|
+
|
249
|
+
--expfile <f> Use <f> as the experiment description and configuration file
|
250
|
+
|
251
|
+
--step <s> Evaluate results of which classification step?
|
252
|
+
<s>=argrec: argument recognition,
|
253
|
+
distinguish role from nonrole
|
254
|
+
<s>=arglab: argument labeling, naming roles,
|
255
|
+
builds on argrec
|
256
|
+
<s>=both: first argrec, then arglab
|
257
|
+
<s>=onestep: do argument labeling right away without
|
258
|
+
prior filtering of non-arguments
|
259
|
+
default: both
|
260
|
+
Need not be given if --runID is given.
|
261
|
+
|
262
|
+
--logID <l> If given, evaluate on the test data from this split of
|
263
|
+
the main table.
|
264
|
+
(use either this option or -i or -R)
|
265
|
+
|
266
|
+
--testID <i> If given, evaluate on this test table.
|
267
|
+
(Use either this option or -l or -R)
|
268
|
+
|
269
|
+
|
270
|
+
ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
|
271
|
+
[--tablecont [N]] [--testID|-i <i>] [--split <l>]
|
272
|
+
inspect system-internal data, both global and pertaining to the current
|
273
|
+
experiment.
|
274
|
+
If no options are chosen, an overview of the current experiment
|
275
|
+
is given.
|
276
|
+
|
277
|
+
--expfile <f> Use <f> as the experiment description and
|
278
|
+
configuration file
|
279
|
+
|
280
|
+
--tables Lists all tables of the DB: table name,column names
|
281
|
+
|
282
|
+
--tablecont [N|id:N] Lists the training instances (as feature vectors)
|
283
|
+
of the current experiment.
|
284
|
+
If test ID is given, test instances are listed as well.
|
285
|
+
The optional argument may have one of two forms:
|
286
|
+
- It may be a number N. Then only the N first lines
|
287
|
+
of each set are listed.
|
288
|
+
- It may be a pair id:N. Then only the N first lines of
|
289
|
+
the DB table with ID id are listed. To list all lines
|
290
|
+
of a single DB table, use id:
|
291
|
+
|
292
|
+
--testID <i> If given, --tablecont also lists the feature vectors for
|
293
|
+
this test table
|
294
|
+
|
295
|
+
--runs List all classification runs of the current experiment
|
296
|
+
|
297
|
+
--split <l> List the split with the given ID
|
298
|
+
|
299
|
+
ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
|
300
|
+
[--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
|
301
|
+
[--delsplit <l>] [--writefeatures [<D>]]
|
302
|
+
[--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
|
303
|
+
diverse services.
|
304
|
+
The --del* services are not available in enduser mode.
|
305
|
+
|
306
|
+
--dump [<D>] Dump the database tables for the current experiment file.
|
307
|
+
If a directory <D> is given, the tables are written there,
|
308
|
+
otherwise they are written to
|
309
|
+
data_dir/<experiment_ID>/tables, where data_dir is the
|
310
|
+
data directory given in the experiment file.
|
311
|
+
No existing files in the directory are removed.
|
312
|
+
|
313
|
+
--load [<D>] Construct new database tables from the files in
|
314
|
+
the directory <D>, if it is given, otherwise from
|
315
|
+
data_dir/<experiment_id>/tables, where data_dir
|
316
|
+
is the data directory given in the experiment file.
|
317
|
+
Warning: Database tables are loaded into the
|
318
|
+
current experiment, the one described in the
|
319
|
+
experiment file. Existing data in tables with
|
320
|
+
the same names is overwritten!
|
321
|
+
|
322
|
+
--deltable <t> Remove database table <t>
|
323
|
+
|
324
|
+
--deltables Presents all tables in the database for interactive deletion
|
325
|
+
|
326
|
+
--delexp Remove the experiment described in the given experiment file,
|
327
|
+
all its database tables and files.
|
328
|
+
|
329
|
+
--delruns Presents all classification runs for the current experiment
|
330
|
+
for interactive deletion
|
331
|
+
|
332
|
+
--delsplit <l> Remove the split with ID <l> from the experiment
|
333
|
+
described in the given experiment file.
|
334
|
+
|
335
|
+
--writefeatures <D> Write feature files to directory <D>, such
|
336
|
+
that you can use them with some external machine learning
|
337
|
+
system. If <D> is not given, feature files are written
|
338
|
+
to data_dir/<experiment_id>/your_feature_files/.
|
339
|
+
|
340
|
+
Uses the parameters --step, --testID, --logID to
|
341
|
+
determine which feature files will be written.
|
342
|
+
|
343
|
+
--step <s> Use with --writefeatures: task for which to write features.
|
344
|
+
<s>=argrec: argument recognition,
|
345
|
+
distinguish role from nonrole
|
346
|
+
<s>=arglab: argument labeling, naming roles
|
347
|
+
<s>=onestep: do argument labeling right away without
|
348
|
+
prior filtering of non-arguments
|
349
|
+
default: onestep.
|
350
|
+
|
351
|
+
--logID <l> Use with --writefeatures: write features
|
352
|
+
for the the split with ID <l>.
|
353
|
+
|
354
|
+
--testID <i> Use with --writefeatures: write features
|
355
|
+
for the test set with ID <i>.
|
356
|
+
default: #{default_test_ID()}.
|
357
|
+
"
|
358
|
+
|
359
|
+
end
|
360
|
+
|
361
|
+
###
|
362
|
+
# options_hash:
|
363
|
+
#
|
364
|
+
# GetoptLong only allows you to access options via each(),
|
365
|
+
# not individually, and it only allows you to cycle through the options once.
|
366
|
+
# So we re-code the options as a hash
|
367
|
+
def self.options_hash(opts_obj) # GetoptLong object
|
368
|
+
opt_hash = Hash.new
|
369
|
+
|
370
|
+
opts_obj.each do |opt, arg|
|
371
|
+
opt_hash[opt] = arg
|
372
|
+
end
|
373
|
+
|
374
|
+
return opt_hash
|
375
|
+
end
|
376
|
+
|
377
|
+
end # class OptParser
|
378
|
+
|
379
|
+
end # module Rosy
|
data/lib/rosy/rosy.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# AB: 2011-11-14
|
2
|
+
# Initial import done, need to reimplement the whole interface.
|
3
|
+
|
4
|
+
require 'common/DBInterface'
|
5
|
+
require 'rosy/RosyFeaturize'
|
6
|
+
require 'rosy/RosyTest'
|
7
|
+
require 'rosy/RosyTrain'
|
8
|
+
require 'rosy/RosyInspect'
|
9
|
+
require 'rosy/RosyEval'
|
10
|
+
require 'rosy/RosyServices'
|
11
|
+
|
12
|
+
module Rosy
|
13
|
+
class Rosy
|
14
|
+
|
15
|
+
def initialize(options)
|
16
|
+
@exp, @opts = options
|
17
|
+
@task = @opts['--task']
|
18
|
+
end
|
19
|
+
|
20
|
+
def assign
|
21
|
+
|
22
|
+
# make rosy directory pattern:
|
23
|
+
# main rosy directory name (data_dir) plus subdirectory
|
24
|
+
# named after the experiment ID
|
25
|
+
rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
|
26
|
+
@exp.set_entry("rosy_dir", rosy_dir_pattern)
|
27
|
+
|
28
|
+
##
|
29
|
+
# open database
|
30
|
+
|
31
|
+
rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
|
32
|
+
"exp_ID" => @exp.get("experiment_ID")))
|
33
|
+
database = get_db_interface(@exp, rosy_dir, "features")
|
34
|
+
|
35
|
+
table_obj = RosyTrainingTestTable.new(@exp, database)
|
36
|
+
|
37
|
+
##
|
38
|
+
# start the actual processing,
|
39
|
+
# according to given arguments
|
40
|
+
|
41
|
+
# initialize task object
|
42
|
+
#begin
|
43
|
+
case @task
|
44
|
+
when "featurize"
|
45
|
+
task_obj = RosyFeaturize.new(@exp, @opts, table_obj)
|
46
|
+
when "split"
|
47
|
+
task_obj = RosySplit.new(@exp, @opts, table_obj)
|
48
|
+
when "train"
|
49
|
+
task_obj = RosyTrain.new(@exp, @opts, table_obj)
|
50
|
+
when "test"
|
51
|
+
task_obj = RosyTest.new(@exp, @opts, table_obj)
|
52
|
+
when "eval"
|
53
|
+
task_obj = RosyEvalTask.new(@exp, @opts, table_obj)
|
54
|
+
when "inspect"
|
55
|
+
task_obj = RosyInspect.new(@exp, @opts, table_obj)
|
56
|
+
when "services"
|
57
|
+
task_obj = RosyServices.new(@exp, @opts, table_obj)
|
58
|
+
else
|
59
|
+
raise "Shouldn't be here"
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# execute task
|
64
|
+
begin
|
65
|
+
task_obj.perform
|
66
|
+
rescue => e
|
67
|
+
fail "Error during task execution: #{e.class}=>#{e.message}"
|
68
|
+
ensure
|
69
|
+
database.close
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
$stderr.puts "Rosy: done."
|
74
|
+
end
|
75
|
+
|
76
|
+
end # class Rosy
|
77
|
+
end # module Rosy
|