frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,379 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'getoptlong'
4
+
5
+ require 'rosy/RosyConfigData'
6
+
7
+ module Rosy
8
+
9
+ class OptParser
10
+ def self.parse(cmd_args)
11
+
12
+ ##############################
13
+ # main starts here
14
+ ##############################
15
+
16
+ ##
17
+ # evaluate runtime arguments
18
+
19
+ tasks = {
20
+ "featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID, required for test, no default
21
+ [ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT], # set to featurize: 'train' or 'test', no default
22
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, featurize this split. Cannot use both this and -d
23
+ ['--append', '-A', GetoptLong::NO_ARGUMENT]
24
+ ],
25
+ "split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID, required, no default
26
+ [ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT] # percentage training data, default: 90
27
+ ],
28
+ "train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID; if given, will train on split rather than all of main table
29
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT] # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
30
+ ],
31
+ "test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
32
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
33
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, test on this split. Cannot use both this and -i
34
+ [ '--nooutput', '-N', GetoptLong::NO_ARGUMENT] # set this to prevent output of disambiguated test data
35
+ ],
36
+ "eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
37
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
38
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
39
+ ],
40
+ "inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
41
+ [ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
42
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
43
+ [ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
44
+ [ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
45
+ ],
46
+ "services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
47
+ [ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
48
+ [ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
49
+ [ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
50
+ [ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
51
+ [ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
52
+ [ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
53
+ [ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
54
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
55
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
56
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
57
+ ]
58
+ }
59
+
60
+ optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
61
+ [ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
62
+ [ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
63
+ ]
64
+
65
+ tasks.values.each { |more_optnames|
66
+ optnames.concat more_optnames
67
+ }
68
+
69
+ optnames.uniq!
70
+
71
+ # asterisk: "explode" array into individual parameters
72
+ begin
73
+ opts = options_hash(GetoptLong.new(*optnames))
74
+ rescue
75
+ $stderr.puts "Error: unknown command line option: " + $!
76
+ exit 1
77
+ end
78
+
79
+ experiment_filename = nil
80
+
81
+ ##
82
+ # are we being asked for help?
83
+ if opts['--help']
84
+ help()
85
+ exit(0)
86
+ end
87
+
88
+ ##
89
+ # now find the task
90
+ task = opts['--task']
91
+ # sanity checks for task
92
+ if task.nil?
93
+ help()
94
+ exit(0)
95
+ end
96
+ unless tasks.keys.include? task
97
+ $stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
98
+ exit 1
99
+ end
100
+
101
+ ##
102
+ # now evaluate the rest of the options
103
+ opts.each_pair { |opt,arg|
104
+ case opt
105
+ when '--help', '--task'
106
+ # we already handled this
107
+ when '--expfile'
108
+ experiment_filename = arg
109
+ else
110
+ # do we know this option?
111
+ unless tasks[task].assoc(opt)
112
+ $stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
113
+ $stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
114
+ exit 1
115
+ end
116
+ end
117
+ }
118
+
119
+
120
+ if experiment_filename.nil?
121
+ $stderr.puts "I need an experiment file name, option --expfile|-e"
122
+ exit 1
123
+ end
124
+
125
+ ##
126
+ # open config file
127
+
128
+ exp = RosyConfigData.new(experiment_filename)
129
+
130
+ # sanity checks
131
+ unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
132
+ $stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
133
+ exit 1
134
+ end
135
+
136
+ # enduser mode?
137
+ $ENDUSER_MODE = exp.get("enduser_mode")
138
+
139
+ [exp, opts]
140
+ end
141
+
142
+ private
143
+ def self.help
144
+ $stderr.puts "
145
+ ROSY: semantic ROle assignment SYstem Version 0.2
146
+
147
+ Usage:
148
+
149
+ ruby rosy.rb --help|-h
150
+
151
+ gets you this help text.
152
+
153
+ ruby rosy.rb --task|-t featurize --expfile|-e <e>
154
+ [--dataset|-d <d>] [--testID|-i <i>]
155
+ [--logID|-l <l> ] [--append|-A]
156
+ featurizes input data and stores it in a database.
157
+ Enduser mode: dataset has to be 'test' (preset as default),
158
+ no --append.
159
+
160
+ --expfile <e> Use <e> as the experiment description and
161
+ configuration file
162
+
163
+ --dataset <d> Set to featurize: <d> is either 'train'
164
+ (put data into main table) or 'test' (put data
165
+ into separate test table with ID given using --testID)
166
+ Use at least one of --logID, --dataset.
167
+
168
+ --logID <l> Re-featurize the split with ID <l>:
169
+ Features that train on training instances are done
170
+ separately for each split.
171
+ Use at least one of --logID, --dataset.
172
+
173
+ --testID <i> Use <i> as the ID for the table to store the test data.
174
+ necessary only with '--dataset test'. default: #{default_test_ID()}.
175
+
176
+ --append Do not overwrite previously computed features
177
+ for this experiment.
178
+ Rather, append the new features
179
+ to the old featurization files.
180
+ Default: overwrite
181
+
182
+ ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
183
+ [--trainpercent|-r <r>]
184
+ produces a new train/test split on the main table of the experiment.
185
+ Not available in enduser mode.
186
+
187
+ --expfile <f> Use <f> as the experiment description and configuration file
188
+
189
+ --logID <l> Use <l> as the ID for storing this new split
190
+
191
+ --trainpercent <r> Allocate <r> percent of the data as train,
192
+ and 100-<r> as test
193
+ default: <r>=90
194
+
195
+
196
+ ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
197
+ train classifier(s) on the main table data (or a split of it)
198
+ Not available in enduser mode.
199
+
200
+ --expfile <f> Use <f> as the experiment description and configuration file
201
+
202
+ --step <s> What kind of classifier(s) to train?
203
+ <s>=argrec: argument recognition,
204
+ distinguish role from nonrole
205
+ <s>=arglab: argument labeling, naming roles,
206
+ builds on argrec
207
+ <s>=both: first argrec, then arglab
208
+ <s>=onestep: do argument labeling right away without
209
+ prior filtering of non-arguments
210
+ default: both
211
+
212
+ --logID <l> If given, train on this split of the main table rather than
213
+ the whole main table
214
+
215
+
216
+ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
217
+ [--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
218
+ apply classifier(s) on data from a test table, or a main table split
219
+ Enduser mode: only -s both, -s onestep available. Cleanup: Database with
220
+ featurization data is removed after the run.
221
+
222
+ --expfile <f> Use <f> as the experiment description and configuration file
223
+
224
+ --step <s> What kind of classifier(s) to use for testing?
225
+ <s>=argrec: argument recognition,
226
+ distinguish role from nonrole
227
+ <s>=arglab: argument labeling, naming roles,
228
+ builds on argrec
229
+ <s>=both: first argrec, then arglab
230
+ <s>=onestep: do argument labeling right away without
231
+ prior filtering of non-arguments
232
+ default: both
233
+ --logID <l> If given, test on this split of the main table
234
+
235
+ --testID <i> If given, test on this test table.
236
+ (Use either this option or -l)
237
+
238
+ --nooutput Do not produce an output of the disambiguated test data
239
+ in SalsaTigerXML format. This is useful if you just want
240
+ to evaluate the system.
241
+ Default: output is produced.
242
+
243
+
244
+ ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
245
+ [--logID|-l <l> | --testID|-i <i>
246
+ evaluate the classification results.
247
+ Not available in enduser mode.
248
+
249
+ --expfile <f> Use <f> as the experiment description and configuration file
250
+
251
+ --step <s> Evaluate results of which classification step?
252
+ <s>=argrec: argument recognition,
253
+ distinguish role from nonrole
254
+ <s>=arglab: argument labeling, naming roles,
255
+ builds on argrec
256
+ <s>=both: first argrec, then arglab
257
+ <s>=onestep: do argument labeling right away without
258
+ prior filtering of non-arguments
259
+ default: both
260
+ Need not be given if --runID is given.
261
+
262
+ --logID <l> If given, evaluate on the test data from this split of
263
+ the main table.
264
+ (use either this option or -i or -R)
265
+
266
+ --testID <i> If given, evaluate on this test table.
267
+ (Use either this option or -l or -R)
268
+
269
+
270
+ ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
271
+ [--tablecont [N]] [--testID|-i <i>] [--split <l>]
272
+ inspect system-internal data, both global and pertaining to the current
273
+ experiment.
274
+ If no options are chosen, an overview of the current experiment
275
+ is given.
276
+
277
+ --expfile <f> Use <f> as the experiment description and
278
+ configuration file
279
+
280
+ --tables Lists all tables of the DB: table name,column names
281
+
282
+ --tablecont [N|id:N] Lists the training instances (as feature vectors)
283
+ of the current experiment.
284
+ If test ID is given, test instances are listed as well.
285
+ The optional argument may have one of two forms:
286
+ - It may be a number N. Then only the N first lines
287
+ of each set are listed.
288
+ - It may be a pair id:N. Then only the N first lines of
289
+ the DB table with ID id are listed. To list all lines
290
+ of a single DB table, use id:
291
+
292
+ --testID <i> If given, --tablecont also lists the feature vectors for
293
+ this test table
294
+
295
+ --runs List all classification runs of the current experiment
296
+
297
+ --split <l> List the split with the given ID
298
+
299
+ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
300
+ [--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
301
+ [--delsplit <l>] [--writefeatures [<D>]]
302
+ [--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
303
+ diverse services.
304
+ The --del* services are not available in enduser mode.
305
+
306
+ --dump [<D>] Dump the database tables for the current experiment file.
307
+ If a directory <D> is given, the tables are written there,
308
+ otherwise they are written to
309
+ data_dir/<experiment_ID>/tables, where data_dir is the
310
+ data directory given in the experiment file.
311
+ No existing files in the directory are removed.
312
+
313
+ --load [<D>] Construct new database tables from the files in
314
+ the directory <D>, if it is given, otherwise from
315
+ data_dir/<experiment_id>/tables, where data_dir
316
+ is the data directory given in the experiment file.
317
+ Warning: Database tables are loaded into the
318
+ current experiment, the one described in the
319
+ experiment file. Existing data in tables with
320
+ the same names is overwritten!
321
+
322
+ --deltable <t> Remove database table <t>
323
+
324
+ --deltables Presents all tables in the database for interactive deletion
325
+
326
+ --delexp Remove the experiment described in the given experiment file,
327
+ all its database tables and files.
328
+
329
+ --delruns Presents all classification runs for the current experiment
330
+ for interactive deletion
331
+
332
+ --delsplit <l> Remove the split with ID <l> from the experiment
333
+ described in the given experiment file.
334
+
335
+ --writefeatures <D> Write feature files to directory <D>, such
336
+ that you can use them with some external machine learning
337
+ system. If <D> is not given, feature files are written
338
+ to data_dir/<experiment_id>/your_feature_files/.
339
+
340
+ Uses the parameters --step, --testID, --logID to
341
+ determine which feature files will be written.
342
+
343
+ --step <s> Use with --writefeatures: task for which to write features.
344
+ <s>=argrec: argument recognition,
345
+ distinguish role from nonrole
346
+ <s>=arglab: argument labeling, naming roles
347
+ <s>=onestep: do argument labeling right away without
348
+ prior filtering of non-arguments
349
+ default: onestep.
350
+
351
+ --logID <l> Use with --writefeatures: write features
352
+ for the the split with ID <l>.
353
+
354
+ --testID <i> Use with --writefeatures: write features
355
+ for the test set with ID <i>.
356
+ default: #{default_test_ID()}.
357
+ "
358
+
359
+ end
360
+
361
+ ###
362
+ # options_hash:
363
+ #
364
+ # GetoptLong only allows you to access options via each(),
365
+ # not individually, and it only allows you to cycle through the options once.
366
+ # So we re-code the options as a hash
367
+ def self.options_hash(opts_obj) # GetoptLong object
368
+ opt_hash = Hash.new
369
+
370
+ opts_obj.each do |opt, arg|
371
+ opt_hash[opt] = arg
372
+ end
373
+
374
+ return opt_hash
375
+ end
376
+
377
+ end # class OptParser
378
+
379
+ end # module Rosy
@@ -0,0 +1,77 @@
1
+ # AB: 2011-11-14
2
+ # Initial import done, need to reimplement the whole interface.
3
+
4
+ require 'common/DBInterface'
5
+ require 'rosy/RosyFeaturize'
6
+ require 'rosy/RosyTest'
7
+ require 'rosy/RosyTrain'
8
+ require 'rosy/RosyInspect'
9
+ require 'rosy/RosyEval'
10
+ require 'rosy/RosyServices'
11
+
12
+ module Rosy
13
+ class Rosy
14
+
15
+ def initialize(options)
16
+ @exp, @opts = options
17
+ @task = @opts['--task']
18
+ end
19
+
20
+ def assign
21
+
22
+ # make rosy directory pattern:
23
+ # main rosy directory name (data_dir) plus subdirectory
24
+ # named after the experiment ID
25
+ rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
26
+ @exp.set_entry("rosy_dir", rosy_dir_pattern)
27
+
28
+ ##
29
+ # open database
30
+
31
+ rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
32
+ "exp_ID" => @exp.get("experiment_ID")))
33
+ database = get_db_interface(@exp, rosy_dir, "features")
34
+
35
+ table_obj = RosyTrainingTestTable.new(@exp, database)
36
+
37
+ ##
38
+ # start the actual processing,
39
+ # according to given arguments
40
+
41
+ # initialize task object
42
+ #begin
43
+ case @task
44
+ when "featurize"
45
+ task_obj = RosyFeaturize.new(@exp, @opts, table_obj)
46
+ when "split"
47
+ task_obj = RosySplit.new(@exp, @opts, table_obj)
48
+ when "train"
49
+ task_obj = RosyTrain.new(@exp, @opts, table_obj)
50
+ when "test"
51
+ task_obj = RosyTest.new(@exp, @opts, table_obj)
52
+ when "eval"
53
+ task_obj = RosyEvalTask.new(@exp, @opts, table_obj)
54
+ when "inspect"
55
+ task_obj = RosyInspect.new(@exp, @opts, table_obj)
56
+ when "services"
57
+ task_obj = RosyServices.new(@exp, @opts, table_obj)
58
+ else
59
+ raise "Shouldn't be here"
60
+ end
61
+
62
+
63
+ # execute task
64
+ begin
65
+ task_obj.perform
66
+ rescue => e
67
+ fail "Error during task execution: #{e.class}=>#{e.message}"
68
+ ensure
69
+ database.close
70
+ end
71
+
72
+
73
+ $stderr.puts "Rosy: done."
74
+ end
75
+
76
+ end # class Rosy
77
+ end # module Rosy