shalmaneser 0.0.1.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +284 -0
@@ -0,0 +1,379 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'getoptlong'
4
+
5
+ require 'rosy/RosyConfigData'
6
+
7
+ module Rosy
8
+
9
+ class OptParser
10
+ def self.parse(cmd_args)
11
+
12
+ ##############################
13
+ # main starts here
14
+ ##############################
15
+
16
+ ##
17
+ # evaluate runtime arguments
18
+
19
+ tasks = {
20
+ "featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID, required for test, no default
21
+ [ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT], # set to featurize: 'train' or 'test', no default
22
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, featurize this split. Cannot use both this and -d
23
+ ['--append', '-A', GetoptLong::NO_ARGUMENT]
24
+ ],
25
+ "split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID, required, no default
26
+ [ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT] # percentage training data, default: 90
27
+ ],
28
+ "train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID; if given, will train on split rather than all of main table
29
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT] # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
30
+ ],
31
+ "test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
32
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
33
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, test on this split. Cannot use both this and -i
34
+ [ '--nooutput', '-N', GetoptLong::NO_ARGUMENT] # set this to prevent output of disambiguated test data
35
+ ],
36
+ "eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
37
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
38
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
39
+ ],
40
+ "inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
41
+ [ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
42
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
43
+ [ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
44
+ [ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
45
+ ],
46
+ "services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
47
+ [ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
48
+ [ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
49
+ [ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
50
+ [ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
51
+ [ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
52
+ [ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
53
+ [ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
54
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
55
+ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
56
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
57
+ ]
58
+ }
59
+
60
+ optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
61
+ [ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
62
+ [ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
63
+ ]
64
+
65
+ tasks.values.each { |more_optnames|
66
+ optnames.concat more_optnames
67
+ }
68
+
69
+ optnames.uniq!
70
+
71
+ # asterisk: "explode" array into individual parameters
72
+ begin
73
+ opts = options_hash(GetoptLong.new(*optnames))
74
+ rescue
75
+ $stderr.puts "Error: unknown command line option: " + $!
76
+ exit 1
77
+ end
78
+
79
+ experiment_filename = nil
80
+
81
+ ##
82
+ # are we being asked for help?
83
+ if opts['--help']
84
+ help()
85
+ exit(0)
86
+ end
87
+
88
+ ##
89
+ # now find the task
90
+ task = opts['--task']
91
+ # sanity checks for task
92
+ if task.nil?
93
+ help()
94
+ exit(0)
95
+ end
96
+ unless tasks.keys.include? task
97
+ $stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
98
+ exit 1
99
+ end
100
+
101
+ ##
102
+ # now evaluate the rest of the options
103
+ opts.each_pair { |opt,arg|
104
+ case opt
105
+ when '--help', '--task'
106
+ # we already handled this
107
+ when '--expfile'
108
+ experiment_filename = arg
109
+ else
110
+ # do we know this option?
111
+ unless tasks[task].assoc(opt)
112
+ $stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
113
+ $stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
114
+ exit 1
115
+ end
116
+ end
117
+ }
118
+
119
+
120
+ if experiment_filename.nil?
121
+ $stderr.puts "I need an experiment file name, option --expfile|-e"
122
+ exit 1
123
+ end
124
+
125
+ ##
126
+ # open config file
127
+
128
+ exp = RosyConfigData.new(experiment_filename)
129
+
130
+ # sanity checks
131
+ unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
132
+ $stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
133
+ exit 1
134
+ end
135
+
136
+ # enduser mode?
137
+ $ENDUSER_MODE = exp.get("enduser_mode")
138
+
139
+ [exp, opts]
140
+ end
141
+
142
+ private
143
+ def self.help
144
+ $stderr.puts "
145
+ ROSY: semantic ROle assignment SYstem Version 0.2
146
+
147
+ Usage:
148
+
149
+ ruby rosy.rb --help|-h
150
+
151
+ gets you this help text.
152
+
153
+ ruby rosy.rb --task|-t featurize --expfile|-e <e>
154
+ [--dataset|-d <d>] [--testID|-i <i>]
155
+ [--logID|-l <l> ] [--append|-A]
156
+ featurizes input data and stores it in a database.
157
+ Enduser mode: dataset has to be 'test' (preset as default),
158
+ no --append.
159
+
160
+ --expfile <e> Use <e> as the experiment description and
161
+ configuration file
162
+
163
+ --dataset <d> Set to featurize: <d> is either 'train'
164
+ (put data into main table) or 'test' (put data
165
+ into separate test table with ID given using --testID)
166
+ Use at least one of --logID, --dataset.
167
+
168
+ --logID <l> Re-featurize the split with ID <l>:
169
+ Features that train on training instances are done
170
+ separately for each split.
171
+ Use at least one of --logID, --dataset.
172
+
173
+ --testID <i> Use <i> as the ID for the table to store the test data.
174
+ necessary only with '--dataset test'. default: #{default_test_ID()}.
175
+
176
+ --append Do not overwrite previously computed features
177
+ for this experiment.
178
+ Rather, append the new features
179
+ to the old featurization files.
180
+ Default: overwrite
181
+
182
+ ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
183
+ [--trainpercent|-r <r>]
184
+ produces a new train/test split on the main table of the experiment.
185
+ Not available in enduser mode.
186
+
187
+ --expfile <f> Use <f> as the experiment description and configuration file
188
+
189
+ --logID <l> Use <l> as the ID for storing this new split
190
+
191
+ --trainpercent <r> Allocate <r> percent of the data as train,
192
+ and 100-<r> as test
193
+ default: <r>=90
194
+
195
+
196
+ ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
197
+ train classifier(s) on the main table data (or a split of it)
198
+ Not available in enduser mode.
199
+
200
+ --expfile <f> Use <f> as the experiment description and configuration file
201
+
202
+ --step <s> What kind of classifier(s) to train?
203
+ <s>=argrec: argument recognition,
204
+ distinguish role from nonrole
205
+ <s>=arglab: argument labeling, naming roles,
206
+ builds on argrec
207
+ <s>=both: first argrec, then arglab
208
+ <s>=onestep: do argument labeling right away without
209
+ prior filtering of non-arguments
210
+ default: both
211
+
212
+ --logID <l> If given, train on this split of the main table rather than
213
+ the whole main table
214
+
215
+
216
+ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
217
+ [--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
218
+ apply classifier(s) on data from a test table, or a main table split
219
+ Enduser mode: only -s both, -s onestep available. Cleanup: Database with
220
+ featurization data is removed after the run.
221
+
222
+ --expfile <f> Use <f> as the experiment description and configuration file
223
+
224
+ --step <s> What kind of classifier(s) to use for testing?
225
+ <s>=argrec: argument recognition,
226
+ distinguish role from nonrole
227
+ <s>=arglab: argument labeling, naming roles,
228
+ builds on argrec
229
+ <s>=both: first argrec, then arglab
230
+ <s>=onestep: do argument labeling right away without
231
+ prior filtering of non-arguments
232
+ default: both
233
+ --logID <l> If given, test on this split of the main table
234
+
235
+ --testID <i> If given, test on this test table.
236
+ (Use either this option or -l)
237
+
238
+ --nooutput Do not produce an output of the disambiguated test data
239
+ in SalsaTigerXML format. This is useful if you just want
240
+ to evaluate the system.
241
+ Default: output is produced.
242
+
243
+
244
+ ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
245
+ [--logID|-l <l> | --testID|-i <i>
246
+ evaluate the classification results.
247
+ Not available in enduser mode.
248
+
249
+ --expfile <f> Use <f> as the experiment description and configuration file
250
+
251
+ --step <s> Evaluate results of which classification step?
252
+ <s>=argrec: argument recognition,
253
+ distinguish role from nonrole
254
+ <s>=arglab: argument labeling, naming roles,
255
+ builds on argrec
256
+ <s>=both: first argrec, then arglab
257
+ <s>=onestep: do argument labeling right away without
258
+ prior filtering of non-arguments
259
+ default: both
260
+ Need not be given if --runID is given.
261
+
262
+ --logID <l> If given, evaluate on the test data from this split of
263
+ the main table.
264
+ (use either this option or -i or -R)
265
+
266
+ --testID <i> If given, evaluate on this test table.
267
+ (Use either this option or -l or -R)
268
+
269
+
270
+ ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
271
+ [--tablecont [N]] [--testID|-i <i>] [--split <l>]
272
+ inspect system-internal data, both global and pertaining to the current
273
+ experiment.
274
+ If no options are chosen, an overview of the current experiment
275
+ is given.
276
+
277
+ --expfile <f> Use <f> as the experiment description and
278
+ configuration file
279
+
280
+ --tables Lists all tables of the DB: table name,column names
281
+
282
+ --tablecont [N|id:N] Lists the training instances (as feature vectors)
283
+ of the current experiment.
284
+ If test ID is given, test instances are listed as well.
285
+ The optional argument may have one of two forms:
286
+ - It may be a number N. Then only the N first lines
287
+ of each set are listed.
288
+ - It may be a pair id:N. Then only the N first lines of
289
+ the DB table with ID id are listed. To list all lines
290
+ of a single DB table, use id:
291
+
292
+ --testID <i> If given, --tablecont also lists the feature vectors for
293
+ this test table
294
+
295
+ --runs List all classification runs of the current experiment
296
+
297
+ --split <l> List the split with the given ID
298
+
299
+ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
300
+ [--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
301
+ [--delsplit <l>] [--writefeatures [<D>]]
302
+ [--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
303
+ diverse services.
304
+ The --del* services are not available in enduser mode.
305
+
306
+ --dump [<D>] Dump the database tables for the current experiment file.
307
+ If a directory <D> is given, the tables are written there,
308
+ otherwise they are written to
309
+ data_dir/<experiment_ID>/tables, where data_dir is the
310
+ data directory given in the experiment file.
311
+ No existing files in the directory are removed.
312
+
313
+ --load [<D>] Construct new database tables from the files in
314
+ the directory <D>, if it is given, otherwise from
315
+ data_dir/<experiment_id>/tables, where data_dir
316
+ is the data directory given in the experiment file.
317
+ Warning: Database tables are loaded into the
318
+ current experiment, the one described in the
319
+ experiment file. Existing data in tables with
320
+ the same names is overwritten!
321
+
322
+ --deltable <t> Remove database table <t>
323
+
324
+ --deltables Presents all tables in the database for interactive deletion
325
+
326
+ --delexp Remove the experiment described in the given experiment file,
327
+ all its database tables and files.
328
+
329
+ --delruns Presents all classification runs for the current experiment
330
+ for interactive deletion
331
+
332
+ --delsplit <l> Remove the split with ID <l> from the experiment
333
+ described in the given experiment file.
334
+
335
+ --writefeatures <D> Write feature files to directory <D>, such
336
+ that you can use them with some external machine learning
337
+ system. If <D> is not given, feature files are written
338
+ to data_dir/<experiment_id>/your_feature_files/.
339
+
340
+ Uses the parameters --step, --testID, --logID to
341
+ determine which feature files will be written.
342
+
343
+ --step <s> Use with --writefeatures: task for which to write features.
344
+ <s>=argrec: argument recognition,
345
+ distinguish role from nonrole
346
+ <s>=arglab: argument labeling, naming roles
347
+ <s>=onestep: do argument labeling right away without
348
+ prior filtering of non-arguments
349
+ default: onestep.
350
+
351
+ --logID <l> Use with --writefeatures: write features
352
+ for the the split with ID <l>.
353
+
354
+ --testID <i> Use with --writefeatures: write features
355
+ for the test set with ID <i>.
356
+ default: #{default_test_ID()}.
357
+ "
358
+
359
+ end
360
+
361
+ ###
362
+ # options_hash:
363
+ #
364
+ # GetoptLong only allows you to access options via each(),
365
+ # not individually, and it only allows you to cycle through the options once.
366
+ # So we re-code the options as a hash
367
+ def self.options_hash(opts_obj) # GetoptLong object
368
+ opt_hash = Hash.new
369
+
370
+ opts_obj.each do |opt, arg|
371
+ opt_hash[opt] = arg
372
+ end
373
+
374
+ return opt_hash
375
+ end
376
+
377
+ end # class OptParser
378
+
379
+ end # module Rosy
data/lib/rosy/rosy.rb ADDED
@@ -0,0 +1,77 @@
1
+ # AB: 2011-11-14
2
+ # Initial import done, need to reimplement the whole interface.
3
+
4
+ require 'common/DBInterface'
5
+ require 'rosy/RosyFeaturize'
6
+ require 'rosy/RosyTest'
7
+ require 'rosy/RosyTrain'
8
+ require 'rosy/RosyInspect'
9
+ require 'rosy/RosyEval'
10
+ require 'rosy/RosyServices'
11
+
12
+ module Rosy
13
+ class Rosy
14
+
15
+ def initialize(options)
16
+ @exp, @opts = options
17
+ @task = @opts['--task']
18
+ end
19
+
20
+ def assign
21
+
22
+ # make rosy directory pattern:
23
+ # main rosy directory name (data_dir) plus subdirectory
24
+ # named after the experiment ID
25
+ rosy_dir_pattern = File.new_dir(@exp.get("data_dir")) + "<exp_ID>/"
26
+ @exp.set_entry("rosy_dir", rosy_dir_pattern)
27
+
28
+ ##
29
+ # open database
30
+
31
+ rosy_dir = File.new_dir(@exp.instantiate("rosy_dir",
32
+ "exp_ID" => @exp.get("experiment_ID")))
33
+ database = get_db_interface(@exp, rosy_dir, "features")
34
+
35
+ table_obj = RosyTrainingTestTable.new(@exp, database)
36
+
37
+ ##
38
+ # start the actual processing,
39
+ # according to given arguments
40
+
41
+ # initialize task object
42
+ #begin
43
+ case @task
44
+ when "featurize"
45
+ task_obj = RosyFeaturize.new(@exp, @opts, table_obj)
46
+ when "split"
47
+ task_obj = RosySplit.new(@exp, @opts, table_obj)
48
+ when "train"
49
+ task_obj = RosyTrain.new(@exp, @opts, table_obj)
50
+ when "test"
51
+ task_obj = RosyTest.new(@exp, @opts, table_obj)
52
+ when "eval"
53
+ task_obj = RosyEvalTask.new(@exp, @opts, table_obj)
54
+ when "inspect"
55
+ task_obj = RosyInspect.new(@exp, @opts, table_obj)
56
+ when "services"
57
+ task_obj = RosyServices.new(@exp, @opts, table_obj)
58
+ else
59
+ raise "Shouldn't be here"
60
+ end
61
+
62
+
63
+ # execute task
64
+ begin
65
+ task_obj.perform
66
+ rescue => e
67
+ fail "Error during task execution: #{e.class}=>#{e.message}"
68
+ ensure
69
+ database.close
70
+ end
71
+
72
+
73
+ $stderr.puts "Rosy: done."
74
+ end
75
+
76
+ end # class Rosy
77
+ end # module Rosy