frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,27 @@
1
+ require "fred/FredFeatures"
2
+
3
+ def determine_training_senses(lemma, exp, lemmas_and_senses_obj, split_id)
4
+ if split_id
5
+ # oh no, we're splitting the dataset into random training and test portions.
6
+ # this means that we actually have to look into the training part of the data to
7
+ # determine the number of training senses
8
+ senses_hash= Hash.new()
9
+
10
+ reader = AnswerKeyAccess.new(exp, "train", lemma, "r", split_id, "train")
11
+ reader.each { |lemma, pos, ids, sids, gold_senses, transformed_gold_senses|
12
+ gold_senses.each { |s| senses_hash[s] = true }
13
+ }
14
+ return senses_hash.keys()
15
+
16
+ else
17
+ # we're using separate test data.
18
+ # so we can just look up the number of training senses
19
+ # in the lemmas_and_senses object
20
+ senses = lemmas_and_senses_obj.get_senses(lemma)
21
+ if senses
22
+ return senses
23
+ else
24
+ return []
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,402 @@
1
+ # FredParameters
2
+ # Katrin Erk, April 05
3
+ #
4
+ # Frame disambiguation system:
5
+ # test different values for system parameters,
6
+ # construct text and graphical output
7
+
8
+ # Salsa packages
9
+ require "PlotAndREval"
10
+
11
+ # Fred packages
12
+ require "FredConfigData"
13
+ require "FredConventions"
14
+ require "FredSplit"
15
+ require "FredTrain"
16
+ require "FredTest"
17
+ require "FredEval"
18
+
19
+ ##########################################
20
+
21
+ ################
22
+ # SlideVar:
23
+ # keeps a single sliding variable,
24
+ # has an iterator that yields each value of the slide as a pair
25
+ # [lhs, rhs] to be passed on to FredConfigData.set_entry()
26
+ #
27
+ # Initialization with the value of a --slide command line parameter.
28
+ # Valid forms:
29
+ #
30
+ # feature=<f>:<what>:<start>-<end>:<slide>
31
+ # with f in { context, ngram, syn, grfunc, fe }
32
+ # what in { weight, dist } (dist only available for context)
33
+ # start, end, slide floats represented as strings
34
+ #
35
+ # <var>:<start>-<end>:<slide>
36
+ # with var in { smoothing_lambda, window_size }
37
+ class SlideVar
38
+ attr_reader :var_name
39
+
40
+ def initialize(string, # value of --slide parameter
41
+ exp) # FredConfigData object
42
+
43
+ # keep start and end value and step size for the sliding
44
+ @startval = @endval = @step = @current = 0.0
45
+
46
+ # setting experiment file values for each step of the sliding:
47
+ # remember lhs and rhs of what needs to be set.
48
+ # rhs contains a string REPLACEME to be replaced by the current value
49
+ @exp_lhs = ""
50
+ @exp_rhs = ""
51
+ @var_name = ""
52
+ @remove_list_variable_regexp = nil # set non-nil if we need unset_list_entry()
53
+
54
+ if string == ""
55
+ # empty slide variable
56
+ return
57
+ end
58
+
59
+ if string =~ /^feature=(\w+):(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
60
+ # --slide feature=ngram:weight:0.8-4.0:0.3
61
+ # --slide feature=context:dist:0.7-0.9:0.05
62
+
63
+ featurename = $1
64
+ parname = $2
65
+ @startval = $3.to_f
66
+ @endval = $4.to_f
67
+ @step = $5.to_f
68
+
69
+ @exp_lhs = "feature"
70
+
71
+ if featurename == "context"
72
+ # both weight and dist possible
73
+
74
+ case parname
75
+ when "weight"
76
+ @exp_rhs = "#{featurename} REPLACEME #{exp.get_lf("feature", "context", "wtdist")}"
77
+ when "dist"
78
+ @exp_rhs = "#{featurename} #{exp.get_lf("feature", "context", "weight")} REPLACEME"
79
+ else
80
+ raise "Error in argument of --slide: I found a value of neither 'weight' nor 'dist': "+ parname
81
+ end
82
+
83
+ if exp.get_lf("feature", "context", "mwedist")
84
+ @exp_rhs << " mwedist"
85
+ end
86
+
87
+ else
88
+ # feature name not "context": only weight possible
89
+ unless parname == "weight"
90
+ raise "Error in argument of --slide: can only do 'weight', what I got is "+ parname
91
+ end
92
+
93
+ @exp_rhs = "#{featurename} REPLACEME"
94
+ end
95
+
96
+ @var_name = "feature #{featurename} #{parname}"
97
+ @remove_list_variable_regexp = Regexp.new("^#{featurename}\s")
98
+
99
+ elsif string =~ /^(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
100
+ # --slide window_size:0-4:1
101
+ # --slide smoothing_lambda:0.3-0.9:0.05
102
+
103
+ featurename = $1
104
+ case exp.get_type(featurename)
105
+ when "integer"
106
+ @startval = $2.to_i
107
+ @endval = $3.to_i
108
+ @step = $4.to_i
109
+ when "float"
110
+ @startval = $2.to_f
111
+ @endval = $3.to_f
112
+ @step = $4.to_f
113
+ else
114
+ raise "Unslidable variable "+ featurename
115
+ end
116
+
117
+ @exp_lhs = featurename
118
+ @exp_rhs = "REPLACEME"
119
+ @var_name = featurename
120
+
121
+ else
122
+ # not a valid argument to --slide
123
+
124
+ raise "Sorry, could not parse argument of --slide. \nI got: "+ string
125
+ end
126
+ end
127
+
128
+ ####
129
+ # iterate through each value of the slide variable (if there is a slide variable)
130
+ # and set it in the experiment file data structure
131
+ #
132
+ # also yield a descriptive text of the current setting
133
+ def each_slide_value(exp) # FredConfigData object
134
+
135
+ if empty?
136
+ # no slide variable
137
+
138
+ yield [0, ""]
139
+ return
140
+
141
+ else
142
+ # the slide variable is nonempty
143
+
144
+ @current = @startval
145
+
146
+ while @current <= @endval
147
+
148
+ if @remove_list_variable_regexp
149
+ # we have a list feature that we first need to unset before setting it
150
+ exp.unset_list_entry(@exp_lhs, @remove_list_variable_regexp)
151
+ end
152
+ exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, @current.to_s))
153
+
154
+ yield [@current, @var_name + "=" + @current.to_s]
155
+ @current += @step
156
+ end
157
+ end
158
+ end
159
+
160
+ def empty?
161
+ return @exp_lhs.empty?
162
+ end
163
+ end
164
+
165
+ ################
166
+ # ToggleVar:
167
+ # keeps a single toggle variable,
168
+ # and has a method that sets this toggle variable to a given value
169
+ # in the experiment file data structure.
170
+ class ToggleVar
171
+ attr_reader :var_name
172
+
173
+ def initialize(string, # part of value of --slide parameter, which has been split at :
174
+ exp) # FredConfigData object
175
+
176
+ if string =~ /^feature_dim=(\w+)$/
177
+ # feature dimension
178
+
179
+ @exp_lhs = "feature_dim"
180
+ @exp_rhs = $1
181
+ @unset_at_false = true # for false, un-set list valued parameter in set_value_to()
182
+ @var_name = "feature_dim #{@exp_rhs}"
183
+
184
+ unless ["word", "lemma", "pos", "ne"].include? @exp_rhs
185
+ raise "Unknown feature dimension "+ @exp_rhs
186
+ end
187
+
188
+ else
189
+ # normal variable
190
+ unless exp.get_type(string) == "bool"
191
+ raise "Unknown value in --toggle: "+ string
192
+ end
193
+
194
+ if ["use_fn_gf", "window_size"].include? string
195
+ raise "Sorry, cannot toggle #{string}, since this variable takes its effect during featurization."
196
+ end
197
+
198
+ @exp_lhs = string
199
+ @exp_rhs = "REPLACEME"
200
+ @unset_at_false = false # for false, set parameter to false in set_value_to
201
+ @var_name = @exp_lhs
202
+ end
203
+ end
204
+
205
+ ###
206
+ # set the value of my toggle variable to the given boolean
207
+ # in the given experiment file data structure.
208
+ #
209
+ # returns a descriptive text of the current setting
210
+ def set_value_to(boolean, # true, false
211
+ exp) # FredConfigData object
212
+
213
+ if @unset_at_false and not(boolean)
214
+ exp.unset_list_entry(@exp_lhs, @exp_rhs)
215
+ else
216
+ exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, boolean.to_s))
217
+ end
218
+
219
+ return @var_name + "=" + boolean.to_s
220
+ end
221
+
222
+ end
223
+
224
+
225
+ ##########################################
226
+ # main class of this package:
227
+ # try out different values for system parameters,
228
+ # and record the result.
229
+ #
230
+ # One value can be a slide variable, taking on several numerical values.
231
+ # 0 or more values can be toggle variables, taking on the values true and false.
232
+ class FredParameters
233
+
234
+ #####
235
+ def initialize(exp_obj, # FredConfigData object
236
+ options) # hash: runtime option name (string) => value(string)
237
+
238
+
239
+ in_enduser_mode_unavailable()
240
+ @exp = exp_obj
241
+
242
+ ##
243
+ # evaluate runtime options:
244
+ # record the slide variable (if any) plus all toggle variables
245
+ @slide = SlideVar.new("", @exp)
246
+ @toggle = Array.new
247
+ @outfile_prefix = "fred_parameters"
248
+
249
+ options.each_pair do |opt, arg|
250
+ case opt
251
+ when "--slide"
252
+ @slide = SlideVar.new(arg, @exp)
253
+
254
+ when "--toggle"
255
+ arg.split(":").each { |toggle_var|
256
+ @toggle << ToggleVar.new(toggle_var, @exp)
257
+ }
258
+
259
+ when "--output_to"
260
+ @outfile_prefix = arg
261
+
262
+ else
263
+ # case of unknown arguments has been dealt with by fred.rb
264
+ end
265
+ end
266
+
267
+
268
+ # announce the task
269
+ $stderr.puts "---------"
270
+ $stderr.puts "Fred parameter exploration, experiment #{@exp.get("experiment_ID")}"
271
+ $stderr.puts "---------"
272
+
273
+ end
274
+
275
+ ####
276
+ def compute()
277
+ ##
278
+ # make a split of the training data
279
+ begin
280
+ feature_dir = fred_dirname(@exp, "train", "features")
281
+ rescue
282
+ $stderr.puts "To experiment with system parameters, please first featurize training data."
283
+ exit 1
284
+ end
285
+ # make new split ID from system time, and make a split with 80% training, 20% test data
286
+ splitID = Time.new().to_f.to_s
287
+ task_obj = FredSplit.new(@exp,
288
+ { "--logID" => splitID,
289
+ "--trainpercent" => "80",
290
+ },
291
+ true # ignore unambiguous
292
+ )
293
+ task_obj.compute()
294
+
295
+ ##
296
+ # start recording results:
297
+
298
+ # text output file
299
+ begin
300
+ textout_file = File.new(@outfile_prefix + ".txt", "w")
301
+ rescue
302
+ raise "Could not write to output file #{@outfile_prefix}.txt"
303
+ end
304
+
305
+ # values_to_score: hash toggle_values_descr(string) =>
306
+ # hash slide_value(float) => score(float)
307
+ values_to_score = Hash.new()
308
+
309
+ # max_score: float, describing maximum score achieved
310
+ # max_setting: string, describing values for maximum score
311
+ max_score = 0.0
312
+ max_setting = ""
313
+
314
+ ##
315
+ # for each value of the toggle variables
316
+ 0.upto(2**@toggle.length() - 1) { |binary|
317
+
318
+ textout_line = ""
319
+
320
+ # re-set toggle values according to 'binary':
321
+ @toggle.each_index { |i|
322
+ # if the i-th bit is set in binary, set this
323
+ # boolean to true, else set it to false
324
+ if (binary & (2**i)) > 0
325
+ textout_line << @toggle[i].set_value_to(true, @exp) + " "
326
+ else
327
+ textout_line << @toggle[i].set_value_to(false, @exp) + " "
328
+ end
329
+ }
330
+
331
+ values_to_score[textout_line] = Hash.new()
332
+
333
+
334
+ ##
335
+ # for each value of the slide variable
336
+ @slide.each_slide_value(@exp) { |slide_value, slide_value_description|
337
+
338
+ ##
339
+ # progress bar
340
+ $stderr.puts "Parameter exploration: #{textout_line} #{slide_value_description}"
341
+
342
+ ##
343
+ # @exp has been modified to fit the current values of the
344
+ # slide and toggle variables.
345
+ # Now train, test, evaluate on the split we have constructed
346
+ task_obj = FredTrain.new(@exp, { "--logID" => splitID})
347
+ task_obj.compute()
348
+ task_obj = FredTest.new(@exp,
349
+ { "--logID" => splitID,
350
+ "--nooutput"=> true
351
+ })
352
+ task_obj.compute()
353
+ task_obj = FredEval.new(@exp, {"--logID" => splitID})
354
+ task_obj.compute(false) # don't print evaluation results to file
355
+
356
+ ##
357
+ # read off F-score, record result
358
+ score = task_obj.f
359
+
360
+ textout_file.puts textout_line + slide_value_description + " : " + score.to_s
361
+ textout_file.flush()
362
+ values_to_score[textout_line][slide_value] = score
363
+
364
+ if score > max_score
365
+ max_score = score
366
+ max_setting = textout_line + slide_value_description + " : " + score.to_s
367
+ end
368
+ }
369
+ }
370
+
371
+ ##
372
+ # remove split
373
+ FredSplit.remove_split(@exp, splitID)
374
+
375
+ ##
376
+ # plot outcome, report overall maximum
377
+
378
+ unless @slide.empty?
379
+ # gnuplot output only if some slide variable has been used
380
+ title = "Exploring #{@slide.var_name}, " + @toggle.map { |toggle_obj| toggle_obj.var_name }.join(", ")
381
+ PlotAndREval.gnuplot_direct(values_to_score,
382
+ title,
383
+ @slide.var_name,
384
+ "F-score",
385
+ @outfile_prefix + ".ps")
386
+ end
387
+
388
+ $stderr.puts "Parameter exploration finished."
389
+ $stderr.puts "Text output was written to #{@outfile_prefix}.txt"
390
+ unless @slide.empty?
391
+ $stderr.puts "Gnuplot output was written to #{@outfile_prefix}.ps"
392
+ end
393
+
394
+ unless max_setting.empty?
395
+ $stderr.puts "-----------------------"
396
+ $stderr.puts "Maximum score:"
397
+ $stderr.puts max_setting
398
+ end
399
+ end
400
+
401
+ end
402
+
@@ -0,0 +1,84 @@
1
+ # FredSplit
2
+ # Katrin Erk April 05
3
+ #
4
+ # Frame disambiguation system:
5
+ # make random split of the training data
6
+ #
7
+ # The split is computed on the basis of the Fred format
8
+ # feature data.
9
+ # The split is recorded in a separate split directory
10
+ # with a very simple system:
11
+ # - one file per feature file, same filename
12
+ # - one line per instance line in feature file
13
+ # - entry in that line is either 'train' or 'test'
14
+
15
+ # Fred packages
16
+ require "fred/FredSplitPkg"
17
+
18
+ class FredSplit
19
+
20
+ ###
21
+ # new
22
+ #
23
+ # evaluate runtime options and announce the task
24
+ def initialize(exp_obj, # FredConfigData object
25
+ options, # hash: runtime option name (string) => value(string)
26
+ ignore_unambiguous = false)
27
+
28
+ in_enduser_mode_unavailable()
29
+
30
+ @exp = exp_obj
31
+ @ignore_unambiguous = ignore_unambiguous
32
+
33
+ # evaluate runtime options
34
+ @split_id = nil
35
+ @trainpercent = 0.9
36
+
37
+ options.each_pair { |opt, arg|
38
+ case opt
39
+ when "--logID"
40
+ @split_id = arg
41
+
42
+ when "--trainpercent"
43
+ @trainpercent = arg.to_f / 100.0
44
+
45
+ else
46
+ # case of unknown arguments has been dealt with by fred.rb
47
+ end
48
+ }
49
+
50
+ # sanity check: need a log ID
51
+ if @split_id.nil?
52
+ raise "I need a log ID, parameter --logID"
53
+ end
54
+ if @trainpercent <= 0.0 or @trainpercent >= 1.0
55
+ raise "Training percentage needs to be between 1 and 99. I got "+
56
+ (@trainpercent * 100.0).to_i.to_s
57
+ end
58
+
59
+ ##
60
+ # make a splitting object
61
+ @split_obj = FredSplitPkg.new(@exp)
62
+
63
+ # announce the task
64
+ $stderr.puts "---------"
65
+ $stderr.puts "Fred experiment #{@exp.get("experiment_ID")}: Making split, using " + (@trainpercent * 100.0).to_i.to_s + "% as training data."
66
+ $stderr.puts "---------"
67
+ end
68
+
69
+ def FredSplit.remove_split(exp, # FredConfigData object
70
+ splitID) # string: split ID
71
+
72
+ FredSplitPkg.remove_split(exp, splitID)
73
+ end
74
+
75
+ ###
76
+ # compute
77
+ #
78
+ # do the splitting
79
+ def compute()
80
+ FredSplit.remove_split(@exp, @split_id)
81
+ @split_obj.make_new_split(@split_id, @trainpercent,
82
+ @ignore_unambiguous)
83
+ end
84
+ end