shalmaneser-fred 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/fred +8 -3
  4. data/lib/fred/FredConventions.rb +190 -189
  5. data/lib/fred/abstract_context_provider.rb +246 -0
  6. data/lib/fred/abstract_fred_feature_access.rb +43 -0
  7. data/lib/fred/answer_key_access.rb +130 -0
  8. data/lib/fred/aux_keep_writers.rb +94 -0
  9. data/lib/fred/baseline.rb +153 -0
  10. data/lib/fred/context_provider.rb +55 -0
  11. data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
  12. data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
  13. data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
  14. data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
  15. data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
  16. data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
  17. data/lib/fred/feature_extractors.rb +5 -0
  18. data/lib/fred/file_zipped.rb +43 -0
  19. data/lib/fred/find_all_targets.rb +94 -0
  20. data/lib/fred/find_targets_from_frames.rb +92 -0
  21. data/lib/fred/fred.rb +43 -40
  22. data/lib/fred/fred_error.rb +15 -0
  23. data/lib/fred/fred_eval.rb +311 -0
  24. data/lib/fred/fred_feature_access.rb +420 -0
  25. data/lib/fred/fred_feature_info.rb +56 -0
  26. data/lib/fred/fred_featurize.rb +525 -0
  27. data/lib/fred/fred_parameters.rb +190 -0
  28. data/lib/fred/fred_split.rb +86 -0
  29. data/lib/fred/fred_split_pkg.rb +189 -0
  30. data/lib/fred/fred_test.rb +571 -0
  31. data/lib/fred/fred_train.rb +125 -0
  32. data/lib/fred/grammatical_function_access.rb +63 -0
  33. data/lib/fred/md5.rb +6 -0
  34. data/lib/fred/meta_feature_access.rb +185 -0
  35. data/lib/fred/non_contiguous_context_provider.rb +532 -0
  36. data/lib/fred/opt_parser.rb +182 -161
  37. data/lib/fred/plot_and_r_eval.rb +486 -0
  38. data/lib/fred/single_sent_context_provider.rb +76 -0
  39. data/lib/fred/slide_var.rb +148 -0
  40. data/lib/fred/targets.rb +136 -0
  41. data/lib/fred/toggle_var.rb +61 -0
  42. data/lib/fred/word_lemma_pos_ne.rb +51 -0
  43. data/lib/fred/write_features_binary.rb +95 -0
  44. data/lib/fred/write_features_nary.rb +51 -0
  45. data/lib/fred/write_features_nary_or_binary.rb +51 -0
  46. data/lib/shalmaneser/fred.rb +1 -0
  47. metadata +57 -30
  48. data/lib/fred/Baseline.rb +0 -150
  49. data/lib/fred/FileZipped.rb +0 -31
  50. data/lib/fred/FredBOWContext.rb +0 -877
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred_config_data.rb +0 -185
  64. data/test/frprep/test_opt_parser.rb +0 -94
  65. data/test/functional/functional_test_helper.rb +0 -58
  66. data/test/functional/test_fred.rb +0 -47
  67. data/test/functional/test_frprep.rb +0 -99
  68. data/test/functional/test_rosy.rb +0 -40
@@ -1,402 +0,0 @@
1
- # FredParameters
2
- # Katrin Erk, April 05
3
- #
4
- # Frame disambiguation system:
5
- # test different values for system parameters,
6
- # construct text and graphical output
7
-
8
- # Salsa packages
9
- require "PlotAndREval"
10
-
11
- # Fred packages
12
- require "FredConfigData"
13
- require "FredConventions"
14
- require "FredSplit"
15
- require "FredTrain"
16
- require "FredTest"
17
- require "FredEval"
18
-
19
- ##########################################
20
-
21
- ################
22
- # SlideVar:
23
- # keeps a single sliding variable,
24
- # has an iterator that yields each value of the slide as a pair
25
- # [lhs, rhs] to be passed on to FredConfigData.set_entry()
26
- #
27
- # Initialization with the value of a --slide command line parameter.
28
- # Valid forms:
29
- #
30
- # feature=<f>:<what>:<start>-<end>:<slide>
31
- # with f in { context, ngram, syn, grfunc, fe }
32
- # what in { weight, dist } (dist only available for context)
33
- # start, end, slide floats represented as strings
34
- #
35
- # <var>:<start>-<end>:<slide>
36
- # with var in { smoothing_lambda, window_size }
37
- class SlideVar
38
- attr_reader :var_name
39
-
40
- def initialize(string, # value of --slide parameter
41
- exp) # FredConfigData object
42
-
43
- # keep start and end value and step size for the sliding
44
- @startval = @endval = @step = @current = 0.0
45
-
46
- # setting experiment file values for each step of the sliding:
47
- # remember lhs and rhs of what needs to be set.
48
- # rhs contains a string REPLACEME to be replaced by the current value
49
- @exp_lhs = ""
50
- @exp_rhs = ""
51
- @var_name = ""
52
- @remove_list_variable_regexp = nil # set non-nil if we need unset_list_entry()
53
-
54
- if string == ""
55
- # empty slide variable
56
- return
57
- end
58
-
59
- if string =~ /^feature=(\w+):(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
60
- # --slide feature=ngram:weight:0.8-4.0:0.3
61
- # --slide feature=context:dist:0.7-0.9:0.05
62
-
63
- featurename = $1
64
- parname = $2
65
- @startval = $3.to_f
66
- @endval = $4.to_f
67
- @step = $5.to_f
68
-
69
- @exp_lhs = "feature"
70
-
71
- if featurename == "context"
72
- # both weight and dist possible
73
-
74
- case parname
75
- when "weight"
76
- @exp_rhs = "#{featurename} REPLACEME #{exp.get_lf("feature", "context", "wtdist")}"
77
- when "dist"
78
- @exp_rhs = "#{featurename} #{exp.get_lf("feature", "context", "weight")} REPLACEME"
79
- else
80
- raise "Error in argument of --slide: I found a value of neither 'weight' nor 'dist': "+ parname
81
- end
82
-
83
- if exp.get_lf("feature", "context", "mwedist")
84
- @exp_rhs << " mwedist"
85
- end
86
-
87
- else
88
- # feature name not "context": only weight possible
89
- unless parname == "weight"
90
- raise "Error in argument of --slide: can only do 'weight', what I got is "+ parname
91
- end
92
-
93
- @exp_rhs = "#{featurename} REPLACEME"
94
- end
95
-
96
- @var_name = "feature #{featurename} #{parname}"
97
- @remove_list_variable_regexp = Regexp.new("^#{featurename}\s")
98
-
99
- elsif string =~ /^(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
100
- # --slide window_size:0-4:1
101
- # --slide smoothing_lambda:0.3-0.9:0.05
102
-
103
- featurename = $1
104
- case exp.get_type(featurename)
105
- when "integer"
106
- @startval = $2.to_i
107
- @endval = $3.to_i
108
- @step = $4.to_i
109
- when "float"
110
- @startval = $2.to_f
111
- @endval = $3.to_f
112
- @step = $4.to_f
113
- else
114
- raise "Unslidable variable "+ featurename
115
- end
116
-
117
- @exp_lhs = featurename
118
- @exp_rhs = "REPLACEME"
119
- @var_name = featurename
120
-
121
- else
122
- # not a valid argument to --slide
123
-
124
- raise "Sorry, could not parse argument of --slide. \nI got: "+ string
125
- end
126
- end
127
-
128
- ####
129
- # iterate through each value of the slide variable (if there is a slide variable)
130
- # and set it in the experiment file data structure
131
- #
132
- # also yield a descriptive text of the current setting
133
- def each_slide_value(exp) # FredConfigData object
134
-
135
- if empty?
136
- # no slide variable
137
-
138
- yield [0, ""]
139
- return
140
-
141
- else
142
- # the slide variable is nonempty
143
-
144
- @current = @startval
145
-
146
- while @current <= @endval
147
-
148
- if @remove_list_variable_regexp
149
- # we have a list feature that we first need to unset before setting it
150
- exp.unset_list_entry(@exp_lhs, @remove_list_variable_regexp)
151
- end
152
- exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, @current.to_s))
153
-
154
- yield [@current, @var_name + "=" + @current.to_s]
155
- @current += @step
156
- end
157
- end
158
- end
159
-
160
- def empty?
161
- return @exp_lhs.empty?
162
- end
163
- end
164
-
165
- ################
166
- # ToggleVar:
167
- # keeps a single toggle variable,
168
- # and has a method that sets this toggle variable to a given value
169
- # in the experiment file data structure.
170
- class ToggleVar
171
- attr_reader :var_name
172
-
173
- def initialize(string, # part of value of --slide parameter, which has been split at :
174
- exp) # FredConfigData object
175
-
176
- if string =~ /^feature_dim=(\w+)$/
177
- # feature dimension
178
-
179
- @exp_lhs = "feature_dim"
180
- @exp_rhs = $1
181
- @unset_at_false = true # for false, un-set list valued parameter in set_value_to()
182
- @var_name = "feature_dim #{@exp_rhs}"
183
-
184
- unless ["word", "lemma", "pos", "ne"].include? @exp_rhs
185
- raise "Unknown feature dimension "+ @exp_rhs
186
- end
187
-
188
- else
189
- # normal variable
190
- unless exp.get_type(string) == "bool"
191
- raise "Unknown value in --toggle: "+ string
192
- end
193
-
194
- if ["use_fn_gf", "window_size"].include? string
195
- raise "Sorry, cannot toggle #{string}, since this variable takes its effect during featurization."
196
- end
197
-
198
- @exp_lhs = string
199
- @exp_rhs = "REPLACEME"
200
- @unset_at_false = false # for false, set parameter to false in set_value_to
201
- @var_name = @exp_lhs
202
- end
203
- end
204
-
205
- ###
206
- # set the value of my toggle variable to the given boolean
207
- # in the given experiment file data structure.
208
- #
209
- # returns a descriptive text of the current setting
210
- def set_value_to(boolean, # true, false
211
- exp) # FredConfigData object
212
-
213
- if @unset_at_false and not(boolean)
214
- exp.unset_list_entry(@exp_lhs, @exp_rhs)
215
- else
216
- exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, boolean.to_s))
217
- end
218
-
219
- return @var_name + "=" + boolean.to_s
220
- end
221
-
222
- end
223
-
224
-
225
- ##########################################
226
- # main class of this package:
227
- # try out different values for system parameters,
228
- # and record the result.
229
- #
230
- # One value can be a slide variable, taking on several numerical values.
231
- # 0 or more values can be toggle variables, taking on the values true and false.
232
- class FredParameters
233
-
234
- #####
235
- def initialize(exp_obj, # FredConfigData object
236
- options) # hash: runtime option name (string) => value(string)
237
-
238
-
239
- in_enduser_mode_unavailable()
240
- @exp = exp_obj
241
-
242
- ##
243
- # evaluate runtime options:
244
- # record the slide variable (if any) plus all toggle variables
245
- @slide = SlideVar.new("", @exp)
246
- @toggle = Array.new
247
- @outfile_prefix = "fred_parameters"
248
-
249
- options.each_pair do |opt, arg|
250
- case opt
251
- when "--slide"
252
- @slide = SlideVar.new(arg, @exp)
253
-
254
- when "--toggle"
255
- arg.split(":").each { |toggle_var|
256
- @toggle << ToggleVar.new(toggle_var, @exp)
257
- }
258
-
259
- when "--output_to"
260
- @outfile_prefix = arg
261
-
262
- else
263
- # case of unknown arguments has been dealt with by fred.rb
264
- end
265
- end
266
-
267
-
268
- # announce the task
269
- $stderr.puts "---------"
270
- $stderr.puts "Fred parameter exploration, experiment #{@exp.get("experiment_ID")}"
271
- $stderr.puts "---------"
272
-
273
- end
274
-
275
- ####
276
- def compute()
277
- ##
278
- # make a split of the training data
279
- begin
280
- feature_dir = fred_dirname(@exp, "train", "features")
281
- rescue
282
- $stderr.puts "To experiment with system parameters, please first featurize training data."
283
- exit 1
284
- end
285
- # make new split ID from system time, and make a split with 80% training, 20% test data
286
- splitID = Time.new().to_f.to_s
287
- task_obj = FredSplit.new(@exp,
288
- { "--logID" => splitID,
289
- "--trainpercent" => "80",
290
- },
291
- true # ignore unambiguous
292
- )
293
- task_obj.compute()
294
-
295
- ##
296
- # start recording results:
297
-
298
- # text output file
299
- begin
300
- textout_file = File.new(@outfile_prefix + ".txt", "w")
301
- rescue
302
- raise "Could not write to output file #{@outfile_prefix}.txt"
303
- end
304
-
305
- # values_to_score: hash toggle_values_descr(string) =>
306
- # hash slide_value(float) => score(float)
307
- values_to_score = Hash.new()
308
-
309
- # max_score: float, describing maximum score achieved
310
- # max_setting: string, describing values for maximum score
311
- max_score = 0.0
312
- max_setting = ""
313
-
314
- ##
315
- # for each value of the toggle variables
316
- 0.upto(2**@toggle.length() - 1) { |binary|
317
-
318
- textout_line = ""
319
-
320
- # re-set toggle values according to 'binary':
321
- @toggle.each_index { |i|
322
- # if the i-th bit is set in binary, set this
323
- # boolean to true, else set it to false
324
- if (binary & (2**i)) > 0
325
- textout_line << @toggle[i].set_value_to(true, @exp) + " "
326
- else
327
- textout_line << @toggle[i].set_value_to(false, @exp) + " "
328
- end
329
- }
330
-
331
- values_to_score[textout_line] = Hash.new()
332
-
333
-
334
- ##
335
- # for each value of the slide variable
336
- @slide.each_slide_value(@exp) { |slide_value, slide_value_description|
337
-
338
- ##
339
- # progress bar
340
- $stderr.puts "Parameter exploration: #{textout_line} #{slide_value_description}"
341
-
342
- ##
343
- # @exp has been modified to fit the current values of the
344
- # slide and toggle variables.
345
- # Now train, test, evaluate on the split we have constructed
346
- task_obj = FredTrain.new(@exp, { "--logID" => splitID})
347
- task_obj.compute()
348
- task_obj = FredTest.new(@exp,
349
- { "--logID" => splitID,
350
- "--nooutput"=> true
351
- })
352
- task_obj.compute()
353
- task_obj = FredEval.new(@exp, {"--logID" => splitID})
354
- task_obj.compute(false) # don't print evaluation results to file
355
-
356
- ##
357
- # read off F-score, record result
358
- score = task_obj.f
359
-
360
- textout_file.puts textout_line + slide_value_description + " : " + score.to_s
361
- textout_file.flush()
362
- values_to_score[textout_line][slide_value] = score
363
-
364
- if score > max_score
365
- max_score = score
366
- max_setting = textout_line + slide_value_description + " : " + score.to_s
367
- end
368
- }
369
- }
370
-
371
- ##
372
- # remove split
373
- FredSplit.remove_split(@exp, splitID)
374
-
375
- ##
376
- # plot outcome, report overall maximum
377
-
378
- unless @slide.empty?
379
- # gnuplot output only if some slide variable has been used
380
- title = "Exploring #{@slide.var_name}, " + @toggle.map { |toggle_obj| toggle_obj.var_name }.join(", ")
381
- PlotAndREval.gnuplot_direct(values_to_score,
382
- title,
383
- @slide.var_name,
384
- "F-score",
385
- @outfile_prefix + ".ps")
386
- end
387
-
388
- $stderr.puts "Parameter exploration finished."
389
- $stderr.puts "Text output was written to #{@outfile_prefix}.txt"
390
- unless @slide.empty?
391
- $stderr.puts "Gnuplot output was written to #{@outfile_prefix}.ps"
392
- end
393
-
394
- unless max_setting.empty?
395
- $stderr.puts "-----------------------"
396
- $stderr.puts "Maximum score:"
397
- $stderr.puts max_setting
398
- end
399
- end
400
-
401
- end
402
-
@@ -1,84 +0,0 @@
1
- # FredSplit
2
- # Katrin Erk April 05
3
- #
4
- # Frame disambiguation system:
5
- # make random split of the training data
6
- #
7
- # The split is computed on the basis of the Fred format
8
- # feature data.
9
- # The split is recorded in a separate split directory
10
- # with a very simple system:
11
- # - one file per feature file, same filename
12
- # - one line per instance line in feature file
13
- # - entry in that line is either 'train' or 'test'
14
-
15
- # Fred packages
16
- require "fred/FredSplitPkg"
17
-
18
- class FredSplit
19
-
20
- ###
21
- # new
22
- #
23
- # evaluate runtime options and announce the task
24
- def initialize(exp_obj, # FredConfigData object
25
- options, # hash: runtime option name (string) => value(string)
26
- ignore_unambiguous = false)
27
-
28
- in_enduser_mode_unavailable()
29
-
30
- @exp = exp_obj
31
- @ignore_unambiguous = ignore_unambiguous
32
-
33
- # evaluate runtime options
34
- @split_id = nil
35
- @trainpercent = 0.9
36
-
37
- options.each_pair { |opt, arg|
38
- case opt
39
- when "--logID"
40
- @split_id = arg
41
-
42
- when "--trainpercent"
43
- @trainpercent = arg.to_f / 100.0
44
-
45
- else
46
- # case of unknown arguments has been dealt with by fred.rb
47
- end
48
- }
49
-
50
- # sanity check: need a log ID
51
- if @split_id.nil?
52
- raise "I need a log ID, parameter --logID"
53
- end
54
- if @trainpercent <= 0.0 or @trainpercent >= 1.0
55
- raise "Training percentage needs to be between 1 and 99. I got "+
56
- (@trainpercent * 100.0).to_i.to_s
57
- end
58
-
59
- ##
60
- # make a splitting object
61
- @split_obj = FredSplitPkg.new(@exp)
62
-
63
- # announce the task
64
- $stderr.puts "---------"
65
- $stderr.puts "Fred experiment #{@exp.get("experiment_ID")}: Making split, using " + (@trainpercent * 100.0).to_i.to_s + "% as training data."
66
- $stderr.puts "---------"
67
- end
68
-
69
- def FredSplit.remove_split(exp, # FredConfigData object
70
- splitID) # string: split ID
71
-
72
- FredSplitPkg.remove_split(exp, splitID)
73
- end
74
-
75
- ###
76
- # compute
77
- #
78
- # do the splitting
79
- def compute()
80
- FredSplit.remove_split(@exp, @split_id)
81
- @split_obj.make_new_split(@split_id, @trainpercent,
82
- @ignore_unambiguous)
83
- end
84
- end
@@ -1,180 +0,0 @@
1
- ##
2
- # splitting package for WSD:
3
- # compute a split for feature files (one item a line, CSV),
4
- # and apply pre-computed split
5
- # to produce new feature files accordingly
6
-
7
- require "tempfile"
8
-
9
- require "fred/FredDetermineTargets"
10
- require "fred/FredConventions"
11
-
12
- class FredSplitPkg
13
- ###
14
- def initialize(exp)
15
- @exp = exp
16
- end
17
-
18
- ###
19
- def FredSplitPkg.split_dir(exp, split_id, mode = "existing")
20
- return fred_dirname(exp, "split", split_id, mode)
21
- end
22
-
23
- ###
24
- # make a new split
25
- def make_new_split(split_id, # string: ID
26
- trainpercent, # float: percentage training data
27
- ignore_unambiguous = false)
28
-
29
- # where to store the split?
30
- split_dir = FredSplitPkg.split_dir(@exp, split_id, "new")
31
-
32
- lemmas_and_senses = Targets.new(@exp, nil, "r")
33
- unless lemmas_and_senses.targets_okay
34
- # error during initialization
35
- $stderr.puts "Error: Could not read list of known targets, bailing out."
36
- exit 1
37
- end
38
-
39
- # Iterate through lemmas,
40
- # split training feature files.
41
- #
42
- # Do the split only once per lemma,
43
- # even if we have sense-specific feature files
44
- feature_dir = fred_dirname(@exp, "train", "features")
45
-
46
- lemmas_and_senses.get_lemmas().each { |lemma|
47
- # construct split file
48
- splitfilename = split_dir + fred_split_filename(lemma)
49
- begin
50
- splitfile = File.new(splitfilename, "w")
51
- rescue
52
- raise "Error: Couldn't write to file " + splitfilename
53
- end
54
-
55
- # find lemma-specific feature file
56
-
57
- filename = feature_dir + fred_feature_filename(lemma)
58
-
59
- unless File.exists?(filename)
60
- # try lemma+sense-specific feature file
61
- file_pattern = fred_feature_filename(lemma, "*", true)
62
- filename = Dir[feature_dir + file_pattern].first()
63
-
64
- unless filename
65
- # no lemma+sense-specific feature file
66
- $stderr.puts "Warning: split: no feature file found for #{lemma}, skipping."
67
- splitfile.close()
68
- next
69
- end
70
- end
71
-
72
- # open feature file for reading
73
- begin
74
- file = File.new(filename)
75
- rescue
76
- raise "Couldn't read feature file " + filename
77
- end
78
-
79
- if ignore_unambiguous and
80
- lemmas_and_senses.get_senses(lemma).length() < 2
81
- # unambiguous: ignore
82
-
83
- while file.gets()
84
- splitfile.puts "ignore"
85
- end
86
-
87
- else
88
- # read from feature file, classify at random
89
- # as train or test,
90
- # write result to splitfile
91
-
92
- while file.gets()
93
- if rand() < trainpercent
94
- splitfile.puts "train"
95
- else
96
- splitfile.puts "test"
97
- end
98
- end
99
- end
100
-
101
- splitfile.close()
102
- }
103
- end
104
-
105
- ###
106
- # remove an old split
107
- def FredSplitPkg.remove_split(exp, # FredConfigData object
108
- splitID) # string: split ID
109
- begin
110
- split_dir = FredSplitPkg.split_dir(exp, splitID, "new")
111
- rescue
112
- # no split to be removed
113
- return
114
- end
115
- %x{rm -rf #{split_dir}}
116
- end
117
-
118
-
119
- ###
120
- # change feature files according to
121
- # pre-computed split
122
- #
123
- #
124
- # returns: tempfile containing featurized items,
125
- # according to split,
126
- # or nil if the split file wouldn't contain any data
127
- def apply_split(filename, # feature file
128
- lemma, # string: lemma that filename is about
129
- dataset, # string: train, test
130
- split_id) # string: split ID
131
-
132
-
133
- split_filename = FredSplitPkg.split_dir(@exp, split_id) +
134
- fred_split_filename(lemma)
135
-
136
- # read feature file and split file at the same time
137
- # write to tempfile.
138
- f_feat = File.new(filename)
139
- f_split = File.new(split_filename)
140
- f_out = Tempfile.new("fred_split")
141
-
142
- num_yes = 0
143
-
144
- f_feat.each { |line|
145
- begin
146
- split_part = f_split.readline().chomp()
147
- rescue
148
- $stderr.puts "FredSplit error: split file too short."
149
- $stderr.puts "skipping rest of featurization data."
150
- $stderr.puts "Split file: #{split_filename}"
151
- $stderr.puts "Feature file: #{filename}"
152
- raise "HIER"
153
- f_out.close()
154
- if num_yes > 0
155
- return f_out
156
- else
157
- return nil
158
- end
159
- end
160
-
161
- if split_part == dataset
162
- # write training data, and this item is in the training
163
- # part of the split,
164
- # or write test data, and item is in test part
165
- f_out.puts line
166
- num_yes += 1
167
- end
168
- }
169
- f_out.close()
170
- f_feat.close()
171
- f_split.close()
172
-
173
- if num_yes > 0
174
- return f_out
175
- else
176
- return nil
177
- end
178
-
179
- end
180
- end