shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/rosy +14 -7
  4. data/lib/rosy/FailedParses.rb +22 -20
  5. data/lib/rosy/FeatureInfo.rb +35 -31
  6. data/lib/rosy/GfInduce.rb +132 -130
  7. data/lib/rosy/GfInduceFeature.rb +86 -68
  8. data/lib/rosy/InputData.rb +59 -55
  9. data/lib/rosy/RosyConfusability.rb +47 -40
  10. data/lib/rosy/RosyEval.rb +55 -55
  11. data/lib/rosy/RosyFeatureExtractors.rb +295 -290
  12. data/lib/rosy/RosyFeaturize.rb +54 -67
  13. data/lib/rosy/RosyInspect.rb +52 -50
  14. data/lib/rosy/RosyIterator.rb +73 -67
  15. data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
  16. data/lib/rosy/RosyPruning.rb +39 -31
  17. data/lib/rosy/RosyServices.rb +116 -115
  18. data/lib/rosy/RosySplit.rb +55 -53
  19. data/lib/rosy/RosyTask.rb +7 -3
  20. data/lib/rosy/RosyTest.rb +174 -191
  21. data/lib/rosy/RosyTrain.rb +46 -50
  22. data/lib/rosy/RosyTrainingTestTable.rb +101 -99
  23. data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
  24. data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
  25. data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
  26. data/lib/rosy/external_feature_extractor.rb +35 -0
  27. data/lib/rosy/opt_parser.rb +231 -201
  28. data/lib/rosy/rosy.rb +63 -64
  29. data/lib/rosy/rosy_conventions.rb +66 -0
  30. data/lib/rosy/rosy_error.rb +15 -0
  31. data/lib/rosy/var_var_restriction.rb +16 -0
  32. data/lib/shalmaneser/rosy.rb +1 -0
  33. metadata +26 -19
  34. data/lib/rosy/ExternalConfigData.rb +0 -58
  35. data/lib/rosy/View.rb +0 -418
  36. data/lib/rosy/rosy_config_data.rb +0 -121
  37. data/test/frprep/test_opt_parser.rb +0 -94
  38. data/test/functional/functional_test_helper.rb +0 -58
  39. data/test/functional/test_fred.rb +0 -47
  40. data/test/functional/test_frprep.rb +0 -99
  41. data/test/functional/test_rosy.rb +0 -40
@@ -7,28 +7,22 @@
7
7
  # Ruby standard library
8
8
  require "tempfile"
9
9
 
10
-
11
10
  # Rosy packages
12
11
  require "rosy/RosyTask"
13
12
  require "rosy/RosyTest"
14
- require "common/RosyConventions"
13
+ require 'rosy/rosy_conventions'
15
14
  require "rosy/RosyIterator"
16
15
  require "rosy/RosyTrainingTestTable"
17
- require "rosy/RosyPruning"
18
- require "common/ML"
19
-
20
- # Frprep packages
21
- #require "common/prep_config_data"
16
+ # require "rosy/RosyPruning"
17
+ require 'ml/classifier'
22
18
 
19
+ module Shalmaneser
20
+ module Rosy
23
21
  class RosyTrain < RosyTask
24
22
 
25
23
  def initialize(exp, # RosyConfigData object: experiment description
26
- opts, # hash: runtime argument option (string) -> value (string)
27
- ttt_obj) # RosyTrainingTestTable object
28
-
29
- #####
30
- # In enduser mode, this whole task is unavailable
31
- in_enduser_mode_unavailable()
24
+ opts, # hash: runtime argument option (string) -> value (string)
25
+ ttt_obj) # RosyTrainingTestTable object
32
26
 
33
27
  ##
34
28
  # remember the experiment description
@@ -46,21 +40,21 @@ class RosyTrain < RosyTask
46
40
  opts.each { |opt,arg|
47
41
  case opt
48
42
  when "--step"
49
- unless ["argrec", "arglab", "onestep", "both"].include? arg
50
- raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
51
- end
52
- @step = arg
43
+ unless ["argrec", "arglab", "onestep", "both"].include? arg
44
+ raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
45
+ end
46
+ @step = arg
53
47
  when "--logID"
54
48
  @splitID = arg
55
49
  else
56
- # this is an option that is okay but has already been read and used by rosy.rb
57
- end
50
+ # this is an option that is okay but has already been read and used by rosy.rb
51
+ end
58
52
  }
59
53
 
60
54
  ##
61
55
  # check: if this is about a split, do we have it?
62
56
  if @splitID
63
- unless @ttt_obj.splitIDs().include?(@splitID)
57
+ unless @ttt_obj.splitIDs.include?(@splitID)
64
58
  $stderr.puts "Sorry, I have no data for split ID #{@splitID}."
65
59
  exit 0
66
60
  end
@@ -80,9 +74,9 @@ class RosyTrain < RosyTask
80
74
  # $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
81
75
  # exit 1
82
76
  # end
83
- # preproc_exp = FrPrepConfigData.new(preproc_expname)
77
+ # preproc_exp = FrappeConfigData.new(preproc_expname)
84
78
  # @exp.adjoin(preproc_exp)
85
-
79
+
86
80
 
87
81
  # get_lf returns: array of pairs [classifier_name, options[array]]
88
82
  #
@@ -101,7 +95,7 @@ class RosyTrain < RosyTask
101
95
  if @splitID
102
96
  $stderr.puts "on split dataset #{@splitID}"
103
97
  else
104
- $stderr.puts "on the complete training dataset"
98
+ $stderr.puts "on the complete training dataset"
105
99
  end
106
100
  $stderr.puts "---------"
107
101
  end
@@ -110,20 +104,20 @@ class RosyTrain < RosyTask
110
104
  # perform
111
105
  #
112
106
  # do each of the inspection tasks set as options
113
- def perform()
107
+ def perform
114
108
 
115
109
  if @step == "both"
116
110
  # both? then do first argrec, then arglab
117
111
  $stderr.puts "Rosy training step argrec"
118
112
  @step = "argrec"
119
- perform_aux()
113
+ perform_aux
120
114
  $stderr.puts "Rosy training step arglab"
121
115
  @step = "arglab"
122
- perform_aux()
116
+ perform_aux
123
117
  else
124
118
  # not both? then just do one
125
119
  $stderr.puts "Rosy training step #{@step}"
126
- perform_aux()
120
+ perform_aux
127
121
  end
128
122
  end
129
123
 
@@ -133,13 +127,13 @@ class RosyTrain < RosyTask
133
127
  # perform_aux: do the actual work of the perform() method
134
128
  # moved here because of the possibility of having @step=="both",
135
129
  # which makes it necessary to perform two training steps one after the other
136
- def perform_aux()
130
+ def perform_aux
137
131
 
138
132
  if @step == "arglab" and not(@exp.get("assume_argrec_perfect"))
139
-
133
+
140
134
  # KE Jan 31, 06: always redo computation of argrec on training data.
141
135
  # We have had trouble with leftover runlogs too often
142
-
136
+
143
137
  # i.e. apply argrec classifiers to argrec training data
144
138
  $stderr.puts "Rosy: Applying argrec classifiers to argrec training data"
145
139
  $stderr.puts " to produce arglab training input"
@@ -147,10 +141,10 @@ class RosyTrain < RosyTask
147
141
  { "--nooutput" => nil,
148
142
  "--logID" => @splitID,
149
143
  "--step" => "argrec"},
150
- @ttt_obj,
144
+ @ttt_obj,
151
145
  true) # argrec_apply: see above
152
-
153
- apply_obj.perform()
146
+
147
+ apply_obj.perform
154
148
  end
155
149
 
156
150
  # hand all the info to the RosyIterator object
@@ -160,12 +154,12 @@ class RosyTrain < RosyTask
160
154
  # RosyIterator will add the appropriate DB column restrictions
161
155
  # such that pruned constituents do nto enter into training
162
156
 
163
- @iterator = RosyIterator.new(@ttt_obj, @exp, "train",
164
- "step" => @step,
165
- "splitID" => @splitID,
157
+ @iterator = RosyIterator.new(@ttt_obj, @exp, "train",
158
+ "step" => @step,
159
+ "splitID" => @splitID,
166
160
  "prune" => true)
167
161
 
168
- if @iterator.num_groups() == 0
162
+ if @iterator.num_groups == 0
169
163
  # no groups:
170
164
  # may have been a problem with pruning.
171
165
  $stderr.puts
@@ -178,13 +172,13 @@ class RosyTrain < RosyTask
178
172
  $stderr.puts
179
173
  end
180
174
 
181
-
175
+
182
176
  ####
183
177
  # get the list of relevant features,
184
- # remove the feature that describes the unit by which we train,
178
+ # remove the feature that describes the unit by which we train,
185
179
  # since it is going to be constant throughout the training file
186
- @features = @ttt_obj.feature_info.get_model_features(@step) -
187
- @iterator.get_xwise_column_names()
180
+ @features = @ttt_obj.feature_info.get_model_features(@step) -
181
+ @iterator.get_xwise_column_names
188
182
  # but add the gold feature
189
183
  unless @features.include? "gold"
190
184
  @features << "gold"
@@ -192,7 +186,7 @@ class RosyTrain < RosyTask
192
186
 
193
187
  ####
194
188
  #for each frame/ for each target POS:
195
- classif_dir = classifier_directory_name(@exp,@step, @splitID)
189
+ classif_dir = ::Shalmaneser::Rosy::classifier_directory_name(@exp,@step, @splitID)
196
190
 
197
191
  @iterator.each_group { |group_descr_hash, group|
198
192
 
@@ -201,34 +195,36 @@ class RosyTrain < RosyTask
201
195
  # get a view: model features, restrict frame/targetPOS to current group
202
196
 
203
197
  view = @iterator.get_a_view_for_current_group(@features)
204
-
198
+
205
199
  # make input file for classifiers:
206
200
  # one instance per line, comma-separated list of features,
207
201
  # last feature is the gold label.
208
202
  tf = Tempfile.new("rosy")
209
-
203
+
210
204
  view.each_instance_s { |instance_string|
211
205
  # change punctuation to _PUNCT_
212
206
  # and change empty space to _
213
207
  # because otherwise some classifiers may spit
214
- tf.puts prepare_output_for_classifiers(instance_string)
208
+ tf.puts Rosy::prepare_output_for_classifiers(instance_string)
215
209
  }
216
- tf.close()
210
+ tf.close
217
211
 
218
212
  # train classifiers
219
213
  @classifiers.each { |classifier, classifier_name|
220
-
214
+
221
215
  # if an explicit classifier dir is given, use that one
222
216
  output_name = classif_dir + @exp.instantiate("classifier_file",
223
217
  "classif" => classifier_name,
224
218
  "group" => group.gsub(/ /, "_"))
225
- classifier.train(tf.path(), output_name)
219
+ classifier.train(tf.path, output_name)
226
220
  }
227
221
 
228
222
  # clean up
229
223
  tf.close(true)
230
- view.close()
224
+ view.close
231
225
  }
232
-
226
+
233
227
  end
234
228
  end
229
+ end
230
+ end
@@ -19,23 +19,26 @@
19
19
  # - index matching the training table index column
20
20
  # - phase 2 features
21
21
  #
22
- # for all tables, training, test and split, there is
22
+ # for all tables, training, test and split, there is
23
23
  # a list of learner application results,
24
24
  # i.e. the labels assigned to instances by some learner
25
25
  # in some learner application run.
26
26
  # For the training table there are classification results for
27
27
  # argrec applied to training data.
28
- # For each split table there are classification results for
28
+ # For each split table there are classification results for
29
29
  # the test part of the split.
30
30
  # For the test tables there are classification results for the test data.
31
- # The runlog for each DB table lists the conditions of each run
31
+ # The runlog for each DB table lists the conditions of each run
32
32
  # (which model features, argrec/arglab/onestep, etc.)
33
33
 
34
- require "common/ruby_class_extensions"
34
+ require "ruby_class_extensions"
35
35
 
36
36
  require 'db/db_table'
37
37
  require "rosy/FeatureInfo"
38
+ require 'rosy/rosy_conventions'
38
39
 
40
+ module Shalmaneser
41
+ module Rosy
39
42
  # @note AB: Possibly this file belongs to <lib/db>. Check it!
40
43
  ######################
41
44
  class RosyTrainingTestTable
@@ -43,7 +46,7 @@ class RosyTrainingTestTable
43
46
 
44
47
  ######
45
48
  # data structures for this class
46
- # TttLog: contains known test IDs, splitIDs, runlogs for this
49
+ # TttLog: contains known test IDs, splitIDs, runlogs for this
47
50
  # experiment.
48
51
  # testIDs: Array(string) known test IDs
49
52
  # splitIDs: Array(string) known split IDs
@@ -59,9 +62,9 @@ class RosyTrainingTestTable
59
62
  # an integer: take the list of feature names for this experiment
60
63
  # in alphabetical order, then set a bit to one if the
61
64
  # corresponding feature is in the list of model features
62
- # xwise: string, xwise for this classification run,
63
- # concatenation of the names of one or more
64
- # features (on which groups of instances
65
+ # xwise: string, xwise for this classification run,
66
+ # concatenation of the names of one or more
67
+ # features (on which groups of instances
65
68
  # was the learner trained?)
66
69
  # column: string, name of the DB table column with the results
67
70
  # of this classification run
@@ -74,7 +77,7 @@ class RosyTrainingTestTable
74
77
 
75
78
  ###
76
79
  def initialize(exp, # RosyConfigData object
77
- database) # Mysql object
80
+ database) # Mysql object
78
81
  @exp = exp
79
82
  @feature_info = RosyFeatureInfo.new(@exp)
80
83
  @database = database
@@ -84,21 +87,21 @@ class RosyTrainingTestTable
84
87
  # name prefix of classifier columns
85
88
  @addcol_prefix = @exp.get("classif_column_name")
86
89
  # name of the main table
87
- @maintable_name = @exp.instantiate("main_table_name",
88
- "exp_ID" => @exp.get("experiment_ID"))
90
+ @maintable_name = @exp.instantiate("main_table_name",
91
+ "exp_ID" => @exp.get("experiment_ID"))
89
92
  # list of pairs [name, mysql format] for each feature (string*string)
90
- @feature_columns = @feature_info.get_column_formats()
93
+ @feature_columns = @feature_info.get_column_formats
91
94
  # list of feature names (strings)
92
- @feature_names = @feature_info.get_column_names()
95
+ @feature_names = @feature_info.get_column_names
93
96
  # make empty columns for classification results:
94
97
  # list of pairs [name, mysql format] for each classifier column (string*string)
95
98
  @classif_columns = Range.new(0,10).map {|id|
96
99
  [
97
- classifcolumn_name(id),
98
- "VARCHAR(20)"
100
+ classifcolumn_name(id),
101
+ "VARCHAR(20)"
99
102
  ]
100
103
  }
101
- # columns for split tables:
104
+ # columns for split tables:
102
105
  # the main table's sentence ID column.
103
106
  # later to be added: split index column copying the main table's index column
104
107
  @split_columns = @feature_columns.select { |name, type|
@@ -106,15 +109,15 @@ class RosyTrainingTestTable
106
109
  }
107
110
 
108
111
  ###
109
- # start the data structure for keeping lists of
110
- # test and split IDs, classification run logs etc.
112
+ # start the data structure for keeping lists of
113
+ # test and split IDs, classification run logs etc.
111
114
  # test whether there is a pickle file.
112
115
  # if so, read it
113
- success = from_file()
116
+ success = from_file
114
117
  unless success
115
118
  # pickle file couldn't be read
116
119
  # initialize to empty object
117
- @log_obj = TttLog.new(Array.new, Array.new, Hash.new)
120
+ @log_obj = TttLog.new([], [], {})
118
121
  end
119
122
  end
120
123
 
@@ -129,7 +132,7 @@ class RosyTrainingTestTable
129
132
  return
130
133
  end
131
134
  Marshal.dump(@log_obj, file)
132
- file.close()
135
+ file.close
133
136
  end
134
137
 
135
138
  def from_file(dir = nil)
@@ -139,7 +142,7 @@ class RosyTrainingTestTable
139
142
  file = File.new(filename)
140
143
  begin
141
144
  @log_obj = Marshal.load(file)
142
- rescue
145
+ rescue
143
146
  # something went wrong, for example an empty pickle file
144
147
  $stderr.puts "ROSY warning: could not read pickle #{filename}, assuming empty."
145
148
  return false
@@ -148,7 +151,7 @@ class RosyTrainingTestTable
148
151
  if dir
149
152
  # load from a different file than the normal one?
150
153
  # then save this log to the normal file too
151
- to_file()
154
+ to_file
152
155
  end
153
156
 
154
157
  return true
@@ -165,10 +168,10 @@ class RosyTrainingTestTable
165
168
  def testtable_name(testID)
166
169
  # no test ID given? use default
167
170
  unless testID
168
- testID = default_test_ID()
171
+ testID = Rosy::default_test_ID
169
172
  end
170
173
 
171
- return @exp.instantiate("test_table_name",
174
+ return @exp.instantiate("test_table_name",
172
175
  "exp_ID" => @exp.get("experiment_ID"),
173
176
  "test_ID" => testID)
174
177
  end
@@ -182,15 +185,15 @@ class RosyTrainingTestTable
182
185
  return "rosy_#{@exp.get("experiment_ID")}_split_#{dataset}_#{splitID}"
183
186
  end
184
187
 
185
- ###
188
+ ###
186
189
  # returns: test IDs for the current experiment (list of strings)
187
- def testIDs()
190
+ def testIDs
188
191
  return @log_obj.testIDs
189
192
  end
190
193
 
191
- ###
194
+ ###
192
195
  # returns: test IDs for the current experiment (list of strings)
193
- def splitIDs()
196
+ def splitIDs
194
197
  return @log_obj.splitIDs
195
198
  end
196
199
 
@@ -210,12 +213,12 @@ class RosyTrainingTestTable
210
213
  if (rl = existing_runlog_aux(loglist, runlog))
211
214
  # runlog already exists
212
215
  return rl.column
213
-
216
+
214
217
  else
215
218
  # runlog does not exist yet.
216
219
  # find the first free column
217
220
  existing_cols = loglist.select { |rl| rl.okay }.map { |rl| rl.column }
218
- @classif_columns.each { |colname, format|
221
+ @classif_columns.each { |colname, format|
219
222
 
220
223
  unless existing_cols.include? colname
221
224
  # found an unused column name:
@@ -231,7 +234,7 @@ class RosyTrainingTestTable
231
234
  # So we have to extend the table.
232
235
  # First find out the complete list of used column names:
233
236
  # all table columns starting with @addcol_prefix
234
- used_classif_columns = Hash.new
237
+ used_classif_columns = {}
235
238
  @database.list_column_names(table_name).each { |column_name|
236
239
  if column_name =~ /^#{@addcol_prefix}/
237
240
  used_classif_columns[column_name] = true
@@ -256,12 +259,12 @@ class RosyTrainingTestTable
256
259
  raise e
257
260
  end
258
261
  puts "Finished adding column at "+Time.now.to_s
259
-
262
+
260
263
  # now use that column
261
264
  runlog.column = colname
262
265
  add_to_runlog(table_name, runlog)
263
266
  return colname
264
- end
267
+ end
265
268
  end
266
269
 
267
270
  ###
@@ -279,7 +282,7 @@ class RosyTrainingTestTable
279
282
  return rl.column
280
283
  else
281
284
  return nil
282
- end
285
+ end
283
286
  end
284
287
 
285
288
  ###
@@ -293,13 +296,13 @@ class RosyTrainingTestTable
293
296
  splitID, # string (splitID) or nil
294
297
  runID) # string: run ID
295
298
  loglist = get_runlogs(proper_table_for_runlog(step, dataset, testID, splitID))
296
- rl = loglist.detect { |rl|
299
+ rl = loglist.detect { |rl|
297
300
  rl.column == runID
298
301
  }
299
302
  if rl
300
303
  rl.okay = true
301
304
  end
302
- to_file()
305
+ to_file
303
306
  end
304
307
 
305
308
 
@@ -309,7 +312,7 @@ class RosyTrainingTestTable
309
312
  column_name) # string: name of the run column
310
313
  loglist = get_runlogs(table_name)
311
314
  loglist.delete_if { |rl| rl.column == column_name }
312
- to_file()
315
+ to_file
313
316
  end
314
317
 
315
318
  ###
@@ -318,8 +321,8 @@ class RosyTrainingTestTable
318
321
  # for all tables of this experiment
319
322
  #
320
323
  # If all runlogs are empty, returns "none known"
321
- def runlog_to_s()
322
- hashes = runlog_to_s_list()
324
+ def runlog_to_s
325
+ hashes = runlog_to_s_list
323
326
 
324
327
  # join text from hashes into a string, omit tables without runs
325
328
  string = ""
@@ -342,43 +345,43 @@ class RosyTrainingTestTable
342
345
  ###
343
346
  # runlog_to_s_list:
344
347
  # returns a list of hashes with keys "table_name", "header", "runlist"
345
- # where header is a string describing one of
346
- # the DB tables of this experiment,
348
+ # where header is a string describing one of
349
+ # the DB tables of this experiment,
347
350
  # and runlist is a list of pairs [ column_name, text],
348
351
  # where text describes the classification run in the column column_name
349
- def runlog_to_s_list()
350
- retv = Array.new
351
-
352
+ def runlog_to_s_list
353
+ retv = []
354
+
352
355
  # main table
353
356
  retv << one_runlog_to_s("train", nil, nil)
354
357
 
355
358
  # test tables
356
- testIDs().each { |testID|
359
+ testIDs.each { |testID|
357
360
  retv << one_runlog_to_s("test", testID, nil)
358
361
  }
359
362
  # split tables
360
- splitIDs().each { |splitID|
363
+ splitIDs.each { |splitID|
361
364
  ["train", "test"].each { |dataset|
362
365
  retv << one_runlog_to_s(dataset, nil, splitID)
363
- }
366
+ }
364
367
  }
365
368
 
366
369
  return retv
367
370
  end
368
-
371
+
369
372
  #######
370
373
  # create new training/test/split table
371
- def new_train_table()
374
+ def new_train_table
372
375
 
373
376
  # remove old runlogs, if they exist
374
377
  del_runlogs(@maintable_name)
375
378
 
376
379
  # make table
377
380
  return DBTable.new(@database, @maintable_name,
378
- "new",
379
- "col_formats" => @feature_columns + @classif_columns,
380
- "index_cols" => @feature_info.get_index_columns(),
381
- "addcol_prefix" => @addcol_prefix)
381
+ "new",
382
+ "col_formats" => @feature_columns + @classif_columns,
383
+ "index_cols" => @feature_info.get_index_columns,
384
+ "addcol_prefix" => @addcol_prefix)
382
385
  end
383
386
 
384
387
  ###
@@ -390,16 +393,16 @@ class RosyTrainingTestTable
390
393
  # remember test ID
391
394
  unless @log_obj.testIDs.include? testID
392
395
  @log_obj.testIDs << testID
393
- to_file()
396
+ to_file
394
397
  end
395
398
 
396
399
  # make table
397
400
  return DBTable.new(@database,
398
401
  testtable_name(testID),
399
- "new",
400
- "col_formats" => @feature_columns + @classif_columns,
401
- "index_cols" => @feature_info.get_index_columns(),
402
- "addcol_prefix" => @addcol_prefix)
402
+ "new",
403
+ "col_formats" => @feature_columns + @classif_columns,
404
+ "index_cols" => @feature_info.get_index_columns,
405
+ "addcol_prefix" => @addcol_prefix)
403
406
 
404
407
  end
405
408
 
@@ -414,11 +417,11 @@ class RosyTrainingTestTable
414
417
  # remember split ID
415
418
  unless @log_obj.splitIDs.include? splitID
416
419
  @log_obj.splitIDs << splitID
417
- to_file()
420
+ to_file
418
421
  end
419
422
 
420
423
  # determine the type of the index column
421
- maintable = existing_train_table()
424
+ maintable = existing_train_table
422
425
  index_name_and_type = maintable.list_column_formats.assoc(maintable.index_name)
423
426
  if index_name_and_type
424
427
  split_index_type = index_name_and_type.last
@@ -429,31 +432,31 @@ class RosyTrainingTestTable
429
432
  end
430
433
 
431
434
  # make table
432
- return DBTable.new(@database,
435
+ return DBTable.new(@database,
433
436
  splittable_name(splitID, dataset),
434
437
  "new",
435
438
  "col_formats" => @split_columns + [[split_index_colname, split_index_type]] + @classif_columns,
436
- "index_cols" => [split_index_colname],
439
+ "index_cols" => [split_index_colname],
437
440
  "addcol_prefix" => @addcol_prefix)
438
441
  end
439
442
 
440
443
 
441
444
  #######
442
445
  # open existing training or test table
443
- def existing_train_table()
446
+ def existing_train_table
444
447
  return DBTable.new(@database, @maintable_name,
445
- "open",
446
- "col_names" => @feature_names,
447
- "addcol_prefix" => @addcol_prefix)
448
+ "open",
449
+ "col_names" => @feature_names,
450
+ "addcol_prefix" => @addcol_prefix)
448
451
  end
449
452
 
450
453
  ###
451
454
  def existing_test_table(testID = "apply")
452
455
  return DBTable.new(@database,
453
456
  testtable_name(testID),
454
- "open",
455
- "col_names" => @feature_names,
456
- "addcol_prefix" => @addcol_prefix)
457
+ "open",
458
+ "col_names" => @feature_names,
459
+ "addcol_prefix" => @addcol_prefix)
457
460
  end
458
461
 
459
462
  ###
@@ -463,7 +466,7 @@ class RosyTrainingTestTable
463
466
 
464
467
  return DBTable.new(@database,
465
468
  splittable_name(splitID, dataset),
466
- "open",
469
+ "open",
467
470
  "col_names" => @split_columns.map { |name, type| name} + [split_index_colname],
468
471
  "addcol_prefix" => @addcol_prefix)
469
472
  end
@@ -472,26 +475,26 @@ class RosyTrainingTestTable
472
475
  # table existence tests
473
476
 
474
477
  ###
475
- def train_table_exists?()
476
- return @database.list_tables().include?(@maintable_name)
478
+ def train_table_exists?
479
+ return @database.list_tables.include?(@maintable_name)
477
480
  end
478
481
 
479
482
  ###
480
483
  def test_table_exists?(testID) # string
481
- return @database.list_tables().include?(testtable_name(testID))
484
+ return @database.list_tables.include?(testtable_name(testID))
482
485
  end
483
486
 
484
487
  ###
485
488
  def split_table_exists?(splitID, # string
486
489
  dataset) # string: train/test
487
- return @database.list_tables().include?(splittable_name(splitID, dataset))
490
+ return @database.list_tables.include?(splittable_name(splitID, dataset))
488
491
  end
489
492
 
490
493
  ##################3
491
494
  # remove tables
492
495
 
493
496
  ###
494
- def remove_train_table()
497
+ def remove_train_table
495
498
  if train_table_exists?
496
499
  del_runlogs(@maintable_name)
497
500
  remove_table(@maintable_name)
@@ -502,7 +505,7 @@ class RosyTrainingTestTable
502
505
  def remove_test_table(testID) # string
503
506
  # remove ID from log
504
507
  @log_obj.testIDs.delete(testID)
505
- to_file()
508
+ to_file
506
509
 
507
510
  # remove DB table
508
511
  if test_table_exists?(testID)
@@ -510,13 +513,13 @@ class RosyTrainingTestTable
510
513
  remove_table(testtable_name(testID))
511
514
  end
512
515
  end
513
-
516
+
514
517
  ###
515
518
  def remove_split_table(splitID, # string
516
519
  dataset) # string: train/test
517
520
  # remove ID from log
518
521
  @log_obj.splitIDs.delete(splitID)
519
- to_file()
522
+ to_file
520
523
 
521
524
  # remove DB table
522
525
  if split_table_exists?(splitID, dataset)
@@ -530,7 +533,7 @@ class RosyTrainingTestTable
530
533
  private
531
534
 
532
535
  ###
533
- # returns: string, name of DB column with classification result
536
+ # returns: string, name of DB column with classification result
534
537
  def classifcolumn_name(id)
535
538
  return @addcol_prefix + "_" + id.to_s
536
539
  end
@@ -558,7 +561,7 @@ class RosyTrainingTestTable
558
561
  dir = File.new_dir(@exp.instantiate("rosy_dir",
559
562
  "exp_ID" => @exp.get("experiment_ID")))
560
563
  end
561
-
564
+
562
565
  return dir + "ttt_data.pkl"
563
566
  end
564
567
 
@@ -569,7 +572,7 @@ class RosyTrainingTestTable
569
572
  # returns: an Array of RunLog objects
570
573
  def get_runlogs(table_name) # string: DB table name
571
574
  unless @log_obj.runlogs[table_name]
572
- @log_obj.runlogs[table_name] = Array.new
575
+ @log_obj.runlogs[table_name] = []
573
576
  end
574
577
 
575
578
  return @log_obj.runlogs[table_name]
@@ -581,7 +584,7 @@ class RosyTrainingTestTable
581
584
  # Saves the changed @log_obj to file.
582
585
  def del_runlogs(table_name) # string: DB table name
583
586
  @log_obj.runlogs.delete(table_name)
584
- to_file()
587
+ to_file
585
588
  end
586
589
 
587
590
  ###
@@ -590,7 +593,7 @@ class RosyTrainingTestTable
590
593
  def add_to_runlog(table_name, # string: DB table name
591
594
  runlog)
592
595
  get_runlogs(table_name) << runlog
593
- to_file()
596
+ to_file
594
597
  end
595
598
 
596
599
  ###
@@ -604,7 +607,7 @@ class RosyTrainingTestTable
604
607
  # sanity check: runlog for training data? this can only be the argrec step
605
608
  if dataset == "train" and step and step != "argrec"
606
609
  raise "Shouldn't be here: #{dataset} #{step}"
607
- end
610
+ end
608
611
 
609
612
  if splitID
610
613
  # access runlogs of a split table
@@ -637,7 +640,7 @@ class RosyTrainingTestTable
637
640
 
638
641
  # learner: concatenation of all learners named in the experiment file,
639
642
  # sorted alphabetically.
640
- #
643
+ #
641
644
  # @exp.get_lf("classifier") returns: array of pairs [classifier_name, options[array]]
642
645
  rl.learner = @exp.get_lf("classifier").map { |classif_name, options| classif_name }.sort.join(" ")
643
646
 
@@ -650,7 +653,7 @@ class RosyTrainingTestTable
650
653
  # default: read one frame at a time
651
654
  rl.xwise = "frame"
652
655
  end
653
-
656
+
654
657
  return rl
655
658
  end
656
659
 
@@ -658,16 +661,16 @@ class RosyTrainingTestTable
658
661
  # auxiliary for "new runlog" and "existing runlog"
659
662
  # to avoid double computation
660
663
  #
661
- # get a list of RunLog objects, check against a given
664
+ # get a list of RunLog objects, check against a given
662
665
  # RunLog object
663
666
  #
664
- # returns: runlog object, if found in the given list,
667
+ # returns: runlog object, if found in the given list,
665
668
  # i.e. if all entries except the column name match
666
669
  # and okay == true
667
670
  # else returns nil
668
671
  def existing_runlog_aux(runlogs, # list of RunLog objects
669
672
  runlog) # RunLog object
670
-
673
+
671
674
  runlogs.each { |rl|
672
675
  if rl.step == runlog.step and
673
676
  rl.learner == runlog.learner and
@@ -691,7 +694,7 @@ class RosyTrainingTestTable
691
694
  def encode_model_features(step) # string: train/test
692
695
  # list model features as hash
693
696
  temp = @feature_info.get_model_features(step)
694
- model_features = Hash.new
697
+ model_features = {}
695
698
  temp.each { |feature_name|
696
699
  model_features[feature_name] = true
697
700
  }
@@ -711,7 +714,7 @@ class RosyTrainingTestTable
711
714
  # returns: a list of strings, the model features
712
715
  def decode_model_features(num) # integer: result of encode_model_features
713
716
 
714
- model_features = Array.new
717
+ model_features = []
715
718
  @feature_names.sort.each_with_index { |feature_name, ix|
716
719
  if num[ix] == 1
717
720
  model_features << feature_name
@@ -749,7 +752,7 @@ class RosyTrainingTestTable
749
752
  end
750
753
  header << "of experiment '#{@exp.get("experiment_ID")}'\n\n"
751
754
 
752
- descr = Array.new
755
+ descr = []
753
756
  loglist.each { |rl|
754
757
  unless rl.okay
755
758
  next
@@ -766,9 +769,9 @@ class RosyTrainingTestTable
766
769
  if count % 5 != 0
767
770
  string << ", "
768
771
  end
769
- count += 1
772
+ count += 1
770
773
  string << feature_name
771
- if count % 5 == 0
774
+ if count % 5 == 0
772
775
  string << "\n\t"
773
776
  end
774
777
  }
@@ -777,11 +780,10 @@ class RosyTrainingTestTable
777
780
 
778
781
  return {
779
782
  "table_name" => table_name,
780
- "header" => header,
783
+ "header" => header,
781
784
  "runlist" => descr
782
785
  }
783
786
  end
784
-
785
-
786
-
787
+ end
788
+ end
787
789
  end