frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,27 @@
1
+ require "fred/FredFeatures"
2
+
3
+ def determine_training_senses(lemma, exp, lemmas_and_senses_obj, split_id)
4
+ if split_id
5
+ # oh no, we're splitting the dataset into random training and test portions.
6
+ # this means that we actually have to look into the training part of the data to
7
+ # determine the number of training senses
8
+ senses_hash= Hash.new()
9
+
10
+ reader = AnswerKeyAccess.new(exp, "train", lemma, "r", split_id, "train")
11
+ reader.each { |lemma, pos, ids, sids, gold_senses, transformed_gold_senses|
12
+ gold_senses.each { |s| senses_hash[s] = true }
13
+ }
14
+ return senses_hash.keys()
15
+
16
+ else
17
+ # we're using separate test data.
18
+ # so we can just look up the number of training senses
19
+ # in the lemmas_and_senses object
20
+ senses = lemmas_and_senses_obj.get_senses(lemma)
21
+ if senses
22
+ return senses
23
+ else
24
+ return []
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,402 @@
1
+ # FredParameters
2
+ # Katrin Erk, April 05
3
+ #
4
+ # Frame disambiguation system:
5
+ # test different values for system parameters,
6
+ # construct text and graphical output
7
+
8
+ # Salsa packages
9
+ require "PlotAndREval"
10
+
11
+ # Fred packages
12
+ require "FredConfigData"
13
+ require "FredConventions"
14
+ require "FredSplit"
15
+ require "FredTrain"
16
+ require "FredTest"
17
+ require "FredEval"
18
+
19
+ ##########################################
20
+
21
+ ################
22
+ # SlideVar:
23
+ # keeps a single sliding variable,
24
+ # has an iterator that yields each value of the slide as a pair
25
+ # [lhs, rhs] to be passed on to FredConfigData.set_entry()
26
+ #
27
+ # Initialization with the value of a --slide command line parameter.
28
+ # Valid forms:
29
+ #
30
+ # feature=<f>:<what>:<start>-<end>:<slide>
31
+ # with f in { context, ngram, syn, grfunc, fe }
32
+ # what in { weight, dist } (dist only available for context)
33
+ # start, end, slide floats represented as strings
34
+ #
35
+ # <var>:<start>-<end>:<slide>
36
+ # with var in { smoothing_lambda, window_size }
37
+ class SlideVar
38
+ attr_reader :var_name
39
+
40
+ def initialize(string, # value of --slide parameter
41
+ exp) # FredConfigData object
42
+
43
+ # keep start and end value and step size for the sliding
44
+ @startval = @endval = @step = @current = 0.0
45
+
46
+ # setting experiment file values for each step of the sliding:
47
+ # remember lhs and rhs of what needs to be set.
48
+ # rhs contains a string REPLACEME to be replaced by the current value
49
+ @exp_lhs = ""
50
+ @exp_rhs = ""
51
+ @var_name = ""
52
+ @remove_list_variable_regexp = nil # set non-nil if we need unset_list_entry()
53
+
54
+ if string == ""
55
+ # empty slide variable
56
+ return
57
+ end
58
+
59
+ if string =~ /^feature=(\w+):(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
60
+ # --slide feature=ngram:weight:0.8-4.0:0.3
61
+ # --slide feature=context:dist:0.7-0.9:0.05
62
+
63
+ featurename = $1
64
+ parname = $2
65
+ @startval = $3.to_f
66
+ @endval = $4.to_f
67
+ @step = $5.to_f
68
+
69
+ @exp_lhs = "feature"
70
+
71
+ if featurename == "context"
72
+ # both weight and dist possible
73
+
74
+ case parname
75
+ when "weight"
76
+ @exp_rhs = "#{featurename} REPLACEME #{exp.get_lf("feature", "context", "wtdist")}"
77
+ when "dist"
78
+ @exp_rhs = "#{featurename} #{exp.get_lf("feature", "context", "weight")} REPLACEME"
79
+ else
80
+ raise "Error in argument of --slide: I found a value of neither 'weight' nor 'dist': "+ parname
81
+ end
82
+
83
+ if exp.get_lf("feature", "context", "mwedist")
84
+ @exp_rhs << " mwedist"
85
+ end
86
+
87
+ else
88
+ # feature name not "context": only weight possible
89
+ unless parname == "weight"
90
+ raise "Error in argument of --slide: can only do 'weight', what I got is "+ parname
91
+ end
92
+
93
+ @exp_rhs = "#{featurename} REPLACEME"
94
+ end
95
+
96
+ @var_name = "feature #{featurename} #{parname}"
97
+ @remove_list_variable_regexp = Regexp.new("^#{featurename}\s")
98
+
99
+ elsif string =~ /^(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
100
+ # --slide window_size:0-4:1
101
+ # --slide smoothing_lambda:0.3-0.9:0.05
102
+
103
+ featurename = $1
104
+ case exp.get_type(featurename)
105
+ when "integer"
106
+ @startval = $2.to_i
107
+ @endval = $3.to_i
108
+ @step = $4.to_i
109
+ when "float"
110
+ @startval = $2.to_f
111
+ @endval = $3.to_f
112
+ @step = $4.to_f
113
+ else
114
+ raise "Unslidable variable "+ featurename
115
+ end
116
+
117
+ @exp_lhs = featurename
118
+ @exp_rhs = "REPLACEME"
119
+ @var_name = featurename
120
+
121
+ else
122
+ # not a valid argument to --slide
123
+
124
+ raise "Sorry, could not parse argument of --slide. \nI got: "+ string
125
+ end
126
+ end
127
+
128
+ ####
129
+ # iterate through each value of the slide variable (if there is a slide variable)
130
+ # and set it in the experiment file data structure
131
+ #
132
+ # also yield a descriptive text of the current setting
133
+ def each_slide_value(exp) # FredConfigData object
134
+
135
+ if empty?
136
+ # no slide variable
137
+
138
+ yield [0, ""]
139
+ return
140
+
141
+ else
142
+ # the slide variable is nonempty
143
+
144
+ @current = @startval
145
+
146
+ while @current <= @endval
147
+
148
+ if @remove_list_variable_regexp
149
+ # we have a list feature that we first need to unset before setting it
150
+ exp.unset_list_entry(@exp_lhs, @remove_list_variable_regexp)
151
+ end
152
+ exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, @current.to_s))
153
+
154
+ yield [@current, @var_name + "=" + @current.to_s]
155
+ @current += @step
156
+ end
157
+ end
158
+ end
159
+
160
+ def empty?
161
+ return @exp_lhs.empty?
162
+ end
163
+ end
164
+
165
+ ################
166
+ # ToggleVar:
167
+ # keeps a single toggle variable,
168
+ # and has a method that sets this toggle variable to a given value
169
+ # in the experiment file data structure.
170
+ class ToggleVar
171
+ attr_reader :var_name
172
+
173
+ def initialize(string, # part of value of --slide parameter, which has been split at :
174
+ exp) # FredConfigData object
175
+
176
+ if string =~ /^feature_dim=(\w+)$/
177
+ # feature dimension
178
+
179
+ @exp_lhs = "feature_dim"
180
+ @exp_rhs = $1
181
+ @unset_at_false = true # for false, un-set list valued parameter in set_value_to()
182
+ @var_name = "feature_dim #{@exp_rhs}"
183
+
184
+ unless ["word", "lemma", "pos", "ne"].include? @exp_rhs
185
+ raise "Unknown feature dimension "+ @exp_rhs
186
+ end
187
+
188
+ else
189
+ # normal variable
190
+ unless exp.get_type(string) == "bool"
191
+ raise "Unknown value in --toggle: "+ string
192
+ end
193
+
194
+ if ["use_fn_gf", "window_size"].include? string
195
+ raise "Sorry, cannot toggle #{string}, since this variable takes its effect during featurization."
196
+ end
197
+
198
+ @exp_lhs = string
199
+ @exp_rhs = "REPLACEME"
200
+ @unset_at_false = false # for false, set parameter to false in set_value_to
201
+ @var_name = @exp_lhs
202
+ end
203
+ end
204
+
205
+ ###
206
+ # set the value of my toggle variable to the given boolean
207
+ # in the given experiment file data structure.
208
+ #
209
+ # returns a descriptive text of the current setting
210
+ def set_value_to(boolean, # true, false
211
+ exp) # FredConfigData object
212
+
213
+ if @unset_at_false and not(boolean)
214
+ exp.unset_list_entry(@exp_lhs, @exp_rhs)
215
+ else
216
+ exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, boolean.to_s))
217
+ end
218
+
219
+ return @var_name + "=" + boolean.to_s
220
+ end
221
+
222
+ end
223
+
224
+
225
+ ##########################################
226
+ # main class of this package:
227
+ # try out different values for system parameters,
228
+ # and record the result.
229
+ #
230
+ # One value can be a slide variable, taking on several numerical values.
231
+ # 0 or more values can be toggle variables, taking on the values true and false.
232
+ class FredParameters
233
+
234
+ #####
235
+ def initialize(exp_obj, # FredConfigData object
236
+ options) # hash: runtime option name (string) => value(string)
237
+
238
+
239
+ in_enduser_mode_unavailable()
240
+ @exp = exp_obj
241
+
242
+ ##
243
+ # evaluate runtime options:
244
+ # record the slide variable (if any) plus all toggle variables
245
+ @slide = SlideVar.new("", @exp)
246
+ @toggle = Array.new
247
+ @outfile_prefix = "fred_parameters"
248
+
249
+ options.each_pair do |opt, arg|
250
+ case opt
251
+ when "--slide"
252
+ @slide = SlideVar.new(arg, @exp)
253
+
254
+ when "--toggle"
255
+ arg.split(":").each { |toggle_var|
256
+ @toggle << ToggleVar.new(toggle_var, @exp)
257
+ }
258
+
259
+ when "--output_to"
260
+ @outfile_prefix = arg
261
+
262
+ else
263
+ # case of unknown arguments has been dealt with by fred.rb
264
+ end
265
+ end
266
+
267
+
268
+ # announce the task
269
+ $stderr.puts "---------"
270
+ $stderr.puts "Fred parameter exploration, experiment #{@exp.get("experiment_ID")}"
271
+ $stderr.puts "---------"
272
+
273
+ end
274
+
275
+ ####
276
+ def compute()
277
+ ##
278
+ # make a split of the training data
279
+ begin
280
+ feature_dir = fred_dirname(@exp, "train", "features")
281
+ rescue
282
+ $stderr.puts "To experiment with system parameters, please first featurize training data."
283
+ exit 1
284
+ end
285
+ # make new split ID from system time, and make a split with 80% training, 20% test data
286
+ splitID = Time.new().to_f.to_s
287
+ task_obj = FredSplit.new(@exp,
288
+ { "--logID" => splitID,
289
+ "--trainpercent" => "80",
290
+ },
291
+ true # ignore unambiguous
292
+ )
293
+ task_obj.compute()
294
+
295
+ ##
296
+ # start recording results:
297
+
298
+ # text output file
299
+ begin
300
+ textout_file = File.new(@outfile_prefix + ".txt", "w")
301
+ rescue
302
+ raise "Could not write to output file #{@outfile_prefix}.txt"
303
+ end
304
+
305
+ # values_to_score: hash toggle_values_descr(string) =>
306
+ # hash slide_value(float) => score(float)
307
+ values_to_score = Hash.new()
308
+
309
+ # max_score: float, describing maximum score achieved
310
+ # max_setting: string, describing values for maximum score
311
+ max_score = 0.0
312
+ max_setting = ""
313
+
314
+ ##
315
+ # for each value of the toggle variables
316
+ 0.upto(2**@toggle.length() - 1) { |binary|
317
+
318
+ textout_line = ""
319
+
320
+ # re-set toggle values according to 'binary':
321
+ @toggle.each_index { |i|
322
+ # if the i-th bit is set in binary, set this
323
+ # boolean to true, else set it to false
324
+ if (binary & (2**i)) > 0
325
+ textout_line << @toggle[i].set_value_to(true, @exp) + " "
326
+ else
327
+ textout_line << @toggle[i].set_value_to(false, @exp) + " "
328
+ end
329
+ }
330
+
331
+ values_to_score[textout_line] = Hash.new()
332
+
333
+
334
+ ##
335
+ # for each value of the slide variable
336
+ @slide.each_slide_value(@exp) { |slide_value, slide_value_description|
337
+
338
+ ##
339
+ # progress bar
340
+ $stderr.puts "Parameter exploration: #{textout_line} #{slide_value_description}"
341
+
342
+ ##
343
+ # @exp has been modified to fit the current values of the
344
+ # slide and toggle variables.
345
+ # Now train, test, evaluate on the split we have constructed
346
+ task_obj = FredTrain.new(@exp, { "--logID" => splitID})
347
+ task_obj.compute()
348
+ task_obj = FredTest.new(@exp,
349
+ { "--logID" => splitID,
350
+ "--nooutput"=> true
351
+ })
352
+ task_obj.compute()
353
+ task_obj = FredEval.new(@exp, {"--logID" => splitID})
354
+ task_obj.compute(false) # don't print evaluation results to file
355
+
356
+ ##
357
+ # read off F-score, record result
358
+ score = task_obj.f
359
+
360
+ textout_file.puts textout_line + slide_value_description + " : " + score.to_s
361
+ textout_file.flush()
362
+ values_to_score[textout_line][slide_value] = score
363
+
364
+ if score > max_score
365
+ max_score = score
366
+ max_setting = textout_line + slide_value_description + " : " + score.to_s
367
+ end
368
+ }
369
+ }
370
+
371
+ ##
372
+ # remove split
373
+ FredSplit.remove_split(@exp, splitID)
374
+
375
+ ##
376
+ # plot outcome, report overall maximum
377
+
378
+ unless @slide.empty?
379
+ # gnuplot output only if some slide variable has been used
380
+ title = "Exploring #{@slide.var_name}, " + @toggle.map { |toggle_obj| toggle_obj.var_name }.join(", ")
381
+ PlotAndREval.gnuplot_direct(values_to_score,
382
+ title,
383
+ @slide.var_name,
384
+ "F-score",
385
+ @outfile_prefix + ".ps")
386
+ end
387
+
388
+ $stderr.puts "Parameter exploration finished."
389
+ $stderr.puts "Text output was written to #{@outfile_prefix}.txt"
390
+ unless @slide.empty?
391
+ $stderr.puts "Gnuplot output was written to #{@outfile_prefix}.ps"
392
+ end
393
+
394
+ unless max_setting.empty?
395
+ $stderr.puts "-----------------------"
396
+ $stderr.puts "Maximum score:"
397
+ $stderr.puts max_setting
398
+ end
399
+ end
400
+
401
+ end
402
+
@@ -0,0 +1,84 @@
1
+ # FredSplit
2
+ # Katrin Erk April 05
3
+ #
4
+ # Frame disambiguation system:
5
+ # make random split of the training data
6
+ #
7
+ # The split is computed on the basis of the Fred format
8
+ # feature data.
9
+ # The split is recorded in a separate split directory
10
+ # with a very simple system:
11
+ # - one file per feature file, same filename
12
+ # - one line per instance line in feature file
13
+ # - entry in that line is either 'train' or 'test'
14
+
15
+ # Fred packages
16
+ require "fred/FredSplitPkg"
17
+
18
+ class FredSplit
19
+
20
+ ###
21
+ # new
22
+ #
23
+ # evaluate runtime options and announce the task
24
+ def initialize(exp_obj, # FredConfigData object
25
+ options, # hash: runtime option name (string) => value(string)
26
+ ignore_unambiguous = false)
27
+
28
+ in_enduser_mode_unavailable()
29
+
30
+ @exp = exp_obj
31
+ @ignore_unambiguous = ignore_unambiguous
32
+
33
+ # evaluate runtime options
34
+ @split_id = nil
35
+ @trainpercent = 0.9
36
+
37
+ options.each_pair { |opt, arg|
38
+ case opt
39
+ when "--logID"
40
+ @split_id = arg
41
+
42
+ when "--trainpercent"
43
+ @trainpercent = arg.to_f / 100.0
44
+
45
+ else
46
+ # case of unknown arguments has been dealt with by fred.rb
47
+ end
48
+ }
49
+
50
+ # sanity check: need a log ID
51
+ if @split_id.nil?
52
+ raise "I need a log ID, parameter --logID"
53
+ end
54
+ if @trainpercent <= 0.0 or @trainpercent >= 1.0
55
+ raise "Training percentage needs to be between 1 and 99. I got "+
56
+ (@trainpercent * 100.0).to_i.to_s
57
+ end
58
+
59
+ ##
60
+ # make a splitting object
61
+ @split_obj = FredSplitPkg.new(@exp)
62
+
63
+ # announce the task
64
+ $stderr.puts "---------"
65
+ $stderr.puts "Fred experiment #{@exp.get("experiment_ID")}: Making split, using " + (@trainpercent * 100.0).to_i.to_s + "% as training data."
66
+ $stderr.puts "---------"
67
+ end
68
+
69
+ def FredSplit.remove_split(exp, # FredConfigData object
70
+ splitID) # string: split ID
71
+
72
+ FredSplitPkg.remove_split(exp, splitID)
73
+ end
74
+
75
+ ###
76
+ # compute
77
+ #
78
+ # do the splitting
79
+ def compute()
80
+ FredSplit.remove_split(@exp, @split_id)
81
+ @split_obj.make_new_split(@split_id, @trainpercent,
82
+ @ignore_unambiguous)
83
+ end
84
+ end