RubyGems - shalmaneser-rosy - Versions diffs - 1.2.0.rc4 → 1.2.rc5 - Mend

shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Files changed (41) hide show

checksums.yaml +4 -4
data/README.md +47 -18
data/bin/rosy +14 -7
data/lib/rosy/FailedParses.rb +22 -20
data/lib/rosy/FeatureInfo.rb +35 -31
data/lib/rosy/GfInduce.rb +132 -130
data/lib/rosy/GfInduceFeature.rb +86 -68
data/lib/rosy/InputData.rb +59 -55
data/lib/rosy/RosyConfusability.rb +47 -40
data/lib/rosy/RosyEval.rb +55 -55
data/lib/rosy/RosyFeatureExtractors.rb +295 -290
data/lib/rosy/RosyFeaturize.rb +54 -67
data/lib/rosy/RosyInspect.rb +52 -50
data/lib/rosy/RosyIterator.rb +73 -67
data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
data/lib/rosy/RosyPruning.rb +39 -31
data/lib/rosy/RosyServices.rb +116 -115
data/lib/rosy/RosySplit.rb +55 -53
data/lib/rosy/RosyTask.rb +7 -3
data/lib/rosy/RosyTest.rb +174 -191
data/lib/rosy/RosyTrain.rb +46 -50
data/lib/rosy/RosyTrainingTestTable.rb +101 -99
data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
data/lib/rosy/external_feature_extractor.rb +35 -0
data/lib/rosy/opt_parser.rb +231 -201
data/lib/rosy/rosy.rb +63 -64
data/lib/rosy/rosy_conventions.rb +66 -0
data/lib/rosy/rosy_error.rb +15 -0
data/lib/rosy/var_var_restriction.rb +16 -0
data/lib/shalmaneser/rosy.rb +1 -0
metadata +26 -19
data/lib/rosy/ExternalConfigData.rb +0 -58
data/lib/rosy/View.rb +0 -418
data/lib/rosy/rosy_config_data.rb +0 -121
data/test/frprep/test_opt_parser.rb +0 -94
data/test/functional/functional_test_helper.rb +0 -58
data/test/functional/test_fred.rb +0 -47
data/test/functional/test_frprep.rb +0 -99
data/test/functional/test_rosy.rb +0 -40

data/lib/rosy/RosyTrain.rb CHANGED Viewed

@@ -7,28 +7,22 @@
 # Ruby standard library
 require "tempfile"
 # Rosy packages
 require "rosy/RosyTask"
 require "rosy/RosyTest"
-require "common/RosyConventions"
+require 'rosy/rosy_conventions'
 require "rosy/RosyIterator"
 require "rosy/RosyTrainingTestTable"
-require "rosy/RosyPruning"
-require "common/ML"
-# Frprep packages
-#require "common/prep_config_data"
+# require "rosy/RosyPruning"
+require 'ml/classifier'
+module Shalmaneser
+module Rosy
 class RosyTrain < RosyTask
   def initialize(exp,      # RosyConfigData object: experiment description
-		 opts,     # hash: runtime argument option (string) -> value (string)
-		 ttt_obj)  # RosyTrainingTestTable object
-    #####
-    # In enduser mode, this whole task is unavailable
-    in_enduser_mode_unavailable()
+                 opts,     # hash: runtime argument option (string) -> value (string)
+                 ttt_obj)  # RosyTrainingTestTable object
     ##
     # remember the experiment description
@@ -46,21 +40,21 @@ class RosyTrain < RosyTask
     opts.each { |opt,arg|
       case opt
       when "--step"
-	unless ["argrec", "arglab", "onestep", "both"].include? arg
-	  raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
-	end
-	@step = arg
+        unless ["argrec", "arglab", "onestep", "both"].include? arg
+          raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
+        end
+        @step = arg
       when "--logID"
         @splitID = arg
       else
-	# this is an option that is okay but has already been read and used by rosy.rb
-      end
+        # this is an option that is okay but has already been read and used by rosy.rb
+      end
     }
     ##
     # check: if this is about a split, do we have it?
     if @splitID
-      unless @ttt_obj.splitIDs().include?(@splitID)
+      unless @ttt_obj.splitIDs.include?(@splitID)
         $stderr.puts "Sorry, I have no data for split ID #{@splitID}."
         exit 0
       end
@@ -80,9 +74,9 @@ class RosyTrain < RosyTask
     #   $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
     #   exit 1
     # end
-    # preproc_exp = FrPrepConfigData.new(preproc_expname)
+    # preproc_exp = FrappeConfigData.new(preproc_expname)
     # @exp.adjoin(preproc_exp)
     # get_lf returns: array of pairs [classifier_name, options[array]]
     #
@@ -101,7 +95,7 @@ class RosyTrain < RosyTask
     if @splitID
       $stderr.puts "on split dataset #{@splitID}"
     else
-      $stderr.puts "on the complete training dataset"
+      $stderr.puts "on the complete training dataset"
     end
     $stderr.puts "---------"
   end
@@ -110,20 +104,20 @@ class RosyTrain < RosyTask
   # perform
   #
   # do each of the inspection tasks set as options
-  def perform()
+  def perform
     if @step == "both"
       # both? then do first argrec, then arglab
       $stderr.puts "Rosy training step argrec"
       @step = "argrec"
-      perform_aux()
+      perform_aux
       $stderr.puts "Rosy training step arglab"
       @step = "arglab"
-      perform_aux()
+      perform_aux
     else
       # not both? then just do one
       $stderr.puts "Rosy training step #{@step}"
-      perform_aux()
+      perform_aux
     end
   end
@@ -133,13 +127,13 @@ class RosyTrain < RosyTask
   # perform_aux: do the actual work of the perform() method
   # moved here because of the possibility of having @step=="both",
   # which makes it necessary to perform two training steps one after the other
-  def perform_aux()
+  def perform_aux
     if @step == "arglab" and not(@exp.get("assume_argrec_perfect"))
       # KE Jan 31, 06: always redo computation of argrec on training data.
       # We have had trouble with leftover runlogs too often
       # i.e. apply argrec classifiers to argrec training data
       $stderr.puts "Rosy: Applying argrec classifiers to argrec training data"
       $stderr.puts "      to produce arglab training input"
@@ -147,10 +141,10 @@ class RosyTrain < RosyTask
                                { "--nooutput" => nil,
                                  "--logID" => @splitID,
                                  "--step" => "argrec"},
-                               @ttt_obj,
+                               @ttt_obj,
                                true) # argrec_apply: see above
-      apply_obj.perform()
+      apply_obj.perform
     end
     # hand all the info to the RosyIterator object
@@ -160,12 +154,12 @@ class RosyTrain < RosyTask
     # RosyIterator will add the appropriate DB column restrictions
     # such that pruned constituents do nto enter into training
-    @iterator = RosyIterator.new(@ttt_obj, @exp, "train",
-				 "step" => @step,
-				 "splitID" => @splitID,
+    @iterator = RosyIterator.new(@ttt_obj, @exp, "train",
+                                 "step" => @step,
+                                 "splitID" => @splitID,
                                  "prune" => true)
-    if @iterator.num_groups() == 0
+    if @iterator.num_groups == 0
       # no groups:
       # may have been a problem with pruning.
       $stderr.puts
@@ -178,13 +172,13 @@ class RosyTrain < RosyTask
       $stderr.puts
     end
     ####
     # get the list of relevant features,
-    # remove the feature that describes the unit by which we train,
+    # remove the feature that describes the unit by which we train,
     # since it is going to be constant throughout the training file
-    @features = @ttt_obj.feature_info.get_model_features(@step) -
-                @iterator.get_xwise_column_names()
+    @features = @ttt_obj.feature_info.get_model_features(@step) -
+                @iterator.get_xwise_column_names
     # but add the gold feature
     unless @features.include? "gold"
       @features << "gold"
@@ -192,7 +186,7 @@ class RosyTrain < RosyTask
     ####
     #for each frame/ for each target POS:
-    classif_dir = classifier_directory_name(@exp,@step, @splitID)
+    classif_dir = ::Shalmaneser::Rosy::classifier_directory_name(@exp,@step, @splitID)
     @iterator.each_group { |group_descr_hash, group|
@@ -201,34 +195,36 @@ class RosyTrain < RosyTask
       # get a view: model features, restrict frame/targetPOS to current group
       view = @iterator.get_a_view_for_current_group(@features)
       # make input file for classifiers:
       # one instance per line, comma-separated list of features,
       # last feature is the gold label.
       tf = Tempfile.new("rosy")
       view.each_instance_s { |instance_string|
         # change punctuation to _PUNCT_
         # and change empty space to _
         # because otherwise some classifiers may spit
-        tf.puts prepare_output_for_classifiers(instance_string)
+        tf.puts Rosy::prepare_output_for_classifiers(instance_string)
       }
-      tf.close()
+      tf.close
       # train classifiers
       @classifiers.each { |classifier, classifier_name|
         # if an explicit classifier dir is given, use that one
         output_name = classif_dir + @exp.instantiate("classifier_file",
                                                      "classif" => classifier_name,
                                                      "group" => group.gsub(/ /, "_"))
-        classifier.train(tf.path(), output_name)
+        classifier.train(tf.path, output_name)
       }
       # clean up
       tf.close(true)
-      view.close()
+      view.close
     }
   end
 end
+end
+end

data/lib/rosy/RosyTrainingTestTable.rb CHANGED Viewed

@@ -19,23 +19,26 @@
 # - index matching the training table index column
 # - phase 2 features
 #
-# for all tables, training, test and split, there is
+# for all tables, training, test and split, there is
 # a list of learner application results,
 # i.e. the labels assigned to instances by some learner
 # in some learner application run.
 # For the training table there are classification results for
 # argrec applied to training data.
-# For each split table there are classification results for
+# For each split table there are classification results for
 # the test part of the split.
 # For the test tables there are classification results for the test data.
-# The runlog for each DB table lists the conditions of each run
+# The runlog for each DB table lists the conditions of each run
 # (which model features, argrec/arglab/onestep, etc.)
-require "common/ruby_class_extensions"
+require "ruby_class_extensions"
 require 'db/db_table'
 require "rosy/FeatureInfo"
+require 'rosy/rosy_conventions'
+module Shalmaneser
+module Rosy
 # @note AB: Possibly this file belongs to <lib/db>. Check it!
 ######################
 class RosyTrainingTestTable
@@ -43,7 +46,7 @@ class RosyTrainingTestTable
   ######
   # data structures for this class
-  # TttLog: contains known test IDs, splitIDs, runlogs for this
+  # TttLog: contains known test IDs, splitIDs, runlogs for this
   #         experiment.
   #  testIDs:  Array(string) known test IDs
   #  splitIDs: Array(string) known split IDs
@@ -59,9 +62,9 @@ class RosyTrainingTestTable
   #            an integer: take the list of feature names for this experiment
   #            in alphabetical order, then set a bit to one if the
   #            corresponding feature is in the list of model features
-  #  xwise: string, xwise for this classification run,
-  #            concatenation of the names of one or more
-  #            features (on which groups of instances
+  #  xwise: string, xwise for this classification run,
+  #            concatenation of the names of one or more
+  #            features (on which groups of instances
   #            was the learner trained?)
   #  column: string, name of the DB table column with the results
   #            of this classification run
@@ -74,7 +77,7 @@ class RosyTrainingTestTable
   ###
   def initialize(exp,      # RosyConfigData object
-		 database) # Mysql object
+                 database) # Mysql object
     @exp = exp
     @feature_info = RosyFeatureInfo.new(@exp)
     @database = database
@@ -84,21 +87,21 @@ class RosyTrainingTestTable
     # name prefix of classifier columns
     @addcol_prefix = @exp.get("classif_column_name")
     # name of the main table
-    @maintable_name = @exp.instantiate("main_table_name",
-				       "exp_ID" => @exp.get("experiment_ID"))
+    @maintable_name = @exp.instantiate("main_table_name",
+                                       "exp_ID" => @exp.get("experiment_ID"))
     # list of pairs [name, mysql format] for each feature (string*string)
-    @feature_columns = @feature_info.get_column_formats()
+    @feature_columns = @feature_info.get_column_formats
     # list of feature names (strings)
-    @feature_names = @feature_info.get_column_names()
+    @feature_names = @feature_info.get_column_names
     # make empty columns for classification results:
     # list of pairs [name, mysql format] for each classifier column (string*string)
     @classif_columns = Range.new(0,10).map {|id|
       [
-	classifcolumn_name(id),
-	"VARCHAR(20)"
+        classifcolumn_name(id),
+        "VARCHAR(20)"
       ]
     }
-    # columns for split tables:
+    # columns for split tables:
     # the main table's sentence ID column.
     # later to be added: split index column copying the main table's index column
     @split_columns = @feature_columns.select { |name, type|
@@ -106,15 +109,15 @@ class RosyTrainingTestTable
     }
     ###
-    # start the data structure for keeping lists of
-    # test and split IDs, classification run logs etc.
+    # start the data structure for keeping lists of
+    # test and split IDs, classification run logs etc.
     # test whether there is a pickle file.
     # if so, read it
-    success = from_file()
+    success = from_file
     unless success
       # pickle file couldn't be read
       # initialize to empty object
-      @log_obj = TttLog.new(Array.new, Array.new, Hash.new)
+      @log_obj = TttLog.new([], [], {})
     end
   end
@@ -129,7 +132,7 @@ class RosyTrainingTestTable
       return
     end
     Marshal.dump(@log_obj, file)
-    file.close()
+    file.close
   end
   def from_file(dir = nil)
@@ -139,7 +142,7 @@ class RosyTrainingTestTable
       file = File.new(filename)
       begin
         @log_obj = Marshal.load(file)
-      rescue
+      rescue
         # something went wrong, for example an empty pickle file
         $stderr.puts "ROSY warning: could not read pickle #{filename}, assuming empty."
         return false
@@ -148,7 +151,7 @@ class RosyTrainingTestTable
       if dir
         # load from a different file than the normal one?
         # then save this log to the normal file too
-        to_file()
+        to_file
       end
       return true
@@ -165,10 +168,10 @@ class RosyTrainingTestTable
   def testtable_name(testID)
     # no test ID given? use default
     unless testID
-      testID = default_test_ID()
+      testID = Rosy::default_test_ID
     end
-    return @exp.instantiate("test_table_name",
+    return @exp.instantiate("test_table_name",
                             "exp_ID" => @exp.get("experiment_ID"),
                             "test_ID" => testID)
   end
@@ -182,15 +185,15 @@ class RosyTrainingTestTable
     return "rosy_#{@exp.get("experiment_ID")}_split_#{dataset}_#{splitID}"
   end
-  ###
+  ###
   # returns: test IDs for the current experiment (list of strings)
-  def testIDs()
+  def testIDs
     return @log_obj.testIDs
   end
-  ###
+  ###
   # returns: test IDs for the current experiment (list of strings)
-  def splitIDs()
+  def splitIDs
     return @log_obj.splitIDs
   end
@@ -210,12 +213,12 @@ class RosyTrainingTestTable
     if (rl = existing_runlog_aux(loglist, runlog))
       # runlog already exists
       return rl.column
     else
       # runlog does not exist yet.
       # find the first free column
       existing_cols = loglist.select { |rl| rl.okay }.map { |rl| rl.column }
-      @classif_columns.each { |colname, format|
+      @classif_columns.each { |colname, format|
         unless existing_cols.include? colname
           # found an unused column name:
@@ -231,7 +234,7 @@ class RosyTrainingTestTable
       # So we have to extend the table.
       # First find out the complete list of used column names:
       # all table columns starting with @addcol_prefix
-      used_classif_columns = Hash.new
+      used_classif_columns = {}
       @database.list_column_names(table_name).each { |column_name|
         if column_name =~ /^#{@addcol_prefix}/
           used_classif_columns[column_name] = true
@@ -256,12 +259,12 @@ class RosyTrainingTestTable
         raise e
       end
       puts "Finished adding column at "+Time.now.to_s
       # now use that column
       runlog.column = colname
       add_to_runlog(table_name, runlog)
       return colname
-    end
+    end
   end
   ###
@@ -279,7 +282,7 @@ class RosyTrainingTestTable
       return rl.column
     else
       return nil
-    end
+    end
   end
   ###
@@ -293,13 +296,13 @@ class RosyTrainingTestTable
                      splitID,  # string (splitID) or nil
                      runID)    # string: run ID
     loglist = get_runlogs(proper_table_for_runlog(step, dataset, testID, splitID))
-    rl = loglist.detect { |rl|
+    rl = loglist.detect { |rl|
       rl.column == runID
     }
     if rl
       rl.okay = true
     end
-    to_file()
+    to_file
   end
@@ -309,7 +312,7 @@ class RosyTrainingTestTable
                     column_name) # string: name of the run column
     loglist = get_runlogs(table_name)
     loglist.delete_if { |rl| rl.column == column_name }
-    to_file()
+    to_file
   end
   ###
@@ -318,8 +321,8 @@ class RosyTrainingTestTable
   # for all tables of this experiment
   #
   # If all runlogs are empty, returns "none known"
-  def runlog_to_s()
-    hashes = runlog_to_s_list()
+  def runlog_to_s
+    hashes = runlog_to_s_list
     # join text from hashes into a string, omit tables without runs
     string = ""
@@ -342,43 +345,43 @@ class RosyTrainingTestTable
   ###
   # runlog_to_s_list:
   # returns a list of hashes with keys "table_name", "header", "runlist"
-  # where header is a string describing one of
-  # the DB tables of this experiment,
+  # where header is a string describing one of
+  # the DB tables of this experiment,
   # and runlist is a list of pairs [ column_name, text],
   # where text describes the classification run in the column column_name
-  def runlog_to_s_list()
-    retv = Array.new
+  def runlog_to_s_list
+    retv = []
     # main table
     retv << one_runlog_to_s("train", nil, nil)
     # test tables
-    testIDs().each { |testID|
+    testIDs.each { |testID|
       retv << one_runlog_to_s("test", testID, nil)
     }
     # split tables
-    splitIDs().each { |splitID|
+    splitIDs.each { |splitID|
       ["train", "test"].each { |dataset|
         retv  << one_runlog_to_s(dataset, nil, splitID)
-      }
+      }
     }
     return retv
   end
   #######
   # create new training/test/split table
-  def new_train_table()
+  def new_train_table
     # remove old runlogs, if they exist
     del_runlogs(@maintable_name)
     # make table
     return DBTable.new(@database, @maintable_name,
- 		       "new",
- 		       "col_formats" => @feature_columns + @classif_columns,
- 		       "index_cols" => @feature_info.get_index_columns(),
- 		       "addcol_prefix" => @addcol_prefix)
+                       "new",
+                       "col_formats" => @feature_columns + @classif_columns,
+                       "index_cols" => @feature_info.get_index_columns,
+                       "addcol_prefix" => @addcol_prefix)
   end
   ###
@@ -390,16 +393,16 @@ class RosyTrainingTestTable
     # remember test ID
     unless @log_obj.testIDs.include? testID
       @log_obj.testIDs << testID
-      to_file()
+      to_file
     end
     # make table
     return DBTable.new(@database,
                        testtable_name(testID),
-		       "new",
-		       "col_formats" => @feature_columns + @classif_columns,
-		       "index_cols" => @feature_info.get_index_columns(),
-		       "addcol_prefix" => @addcol_prefix)
+                       "new",
+                       "col_formats" => @feature_columns + @classif_columns,
+                       "index_cols" => @feature_info.get_index_columns,
+                       "addcol_prefix" => @addcol_prefix)
   end
@@ -414,11 +417,11 @@ class RosyTrainingTestTable
     # remember split ID
     unless @log_obj.splitIDs.include? splitID
       @log_obj.splitIDs << splitID
-      to_file()
+      to_file
     end
     # determine the type of the index column
-    maintable = existing_train_table()
+    maintable = existing_train_table
     index_name_and_type = maintable.list_column_formats.assoc(maintable.index_name)
     if index_name_and_type
       split_index_type = index_name_and_type.last
@@ -429,31 +432,31 @@ class RosyTrainingTestTable
     end
     # make table
-    return DBTable.new(@database,
+    return DBTable.new(@database,
                        splittable_name(splitID, dataset),
                        "new",
                        "col_formats" => @split_columns + [[split_index_colname, split_index_type]] + @classif_columns,
-                       "index_cols" => [split_index_colname],
+                       "index_cols" => [split_index_colname],
                        "addcol_prefix" => @addcol_prefix)
   end
   #######
   # open existing training or test table
-  def existing_train_table()
+  def existing_train_table
     return DBTable.new(@database, @maintable_name,
-		       "open",
-		       "col_names" => @feature_names,
-		       "addcol_prefix" => @addcol_prefix)
+                       "open",
+                       "col_names" => @feature_names,
+                       "addcol_prefix" => @addcol_prefix)
   end
   ###
   def existing_test_table(testID = "apply")
     return DBTable.new(@database,
                        testtable_name(testID),
-		       "open",
-		       "col_names" => @feature_names,
-		       "addcol_prefix" => @addcol_prefix)
+                       "open",
+                       "col_names" => @feature_names,
+                       "addcol_prefix" => @addcol_prefix)
   end
   ###
@@ -463,7 +466,7 @@ class RosyTrainingTestTable
     return DBTable.new(@database,
                        splittable_name(splitID, dataset),
-                       "open",
+                       "open",
                        "col_names" => @split_columns.map { |name, type| name} + [split_index_colname],
                        "addcol_prefix" => @addcol_prefix)
   end
@@ -472,26 +475,26 @@ class RosyTrainingTestTable
   # table existence tests
   ###
-  def train_table_exists?()
-    return @database.list_tables().include?(@maintable_name)
+  def train_table_exists?
+    return @database.list_tables.include?(@maintable_name)
   end
   ###
   def test_table_exists?(testID) # string
-    return @database.list_tables().include?(testtable_name(testID))
+    return @database.list_tables.include?(testtable_name(testID))
   end
   ###
   def split_table_exists?(splitID,  # string
                           dataset)  # string: train/test
-    return @database.list_tables().include?(splittable_name(splitID, dataset))
+    return @database.list_tables.include?(splittable_name(splitID, dataset))
   end
   ##################3
   # remove tables
   ###
-  def remove_train_table()
+  def remove_train_table
     if train_table_exists?
       del_runlogs(@maintable_name)
       remove_table(@maintable_name)
@@ -502,7 +505,7 @@ class RosyTrainingTestTable
   def remove_test_table(testID) # string
     # remove ID from log
     @log_obj.testIDs.delete(testID)
-    to_file()
+    to_file
     # remove DB table
     if test_table_exists?(testID)
@@ -510,13 +513,13 @@ class RosyTrainingTestTable
       remove_table(testtable_name(testID))
     end
   end
   ###
   def remove_split_table(splitID, # string
                          dataset) # string: train/test
     # remove ID from log
     @log_obj.splitIDs.delete(splitID)
-    to_file()
+    to_file
     # remove DB table
     if split_table_exists?(splitID, dataset)
@@ -530,7 +533,7 @@ class RosyTrainingTestTable
   private
   ###
-  # returns: string, name of DB column with classification result
+  # returns: string, name of DB column with classification result
   def classifcolumn_name(id)
     return @addcol_prefix + "_" + id.to_s
   end
@@ -558,7 +561,7 @@ class RosyTrainingTestTable
       dir = File.new_dir(@exp.instantiate("rosy_dir",
                                           "exp_ID" => @exp.get("experiment_ID")))
     end
     return dir + "ttt_data.pkl"
   end
@@ -569,7 +572,7 @@ class RosyTrainingTestTable
   # returns: an Array of RunLog objects
   def get_runlogs(table_name) # string: DB table name
     unless @log_obj.runlogs[table_name]
-      @log_obj.runlogs[table_name] = Array.new
+      @log_obj.runlogs[table_name] = []
     end
     return @log_obj.runlogs[table_name]
@@ -581,7 +584,7 @@ class RosyTrainingTestTable
   # Saves the changed @log_obj to file.
   def del_runlogs(table_name) # string: DB table name
     @log_obj.runlogs.delete(table_name)
-    to_file()
+    to_file
   end
   ###
@@ -590,7 +593,7 @@ class RosyTrainingTestTable
   def add_to_runlog(table_name, # string: DB table name
                     runlog)
     get_runlogs(table_name) << runlog
-    to_file()
+    to_file
   end
   ###
@@ -604,7 +607,7 @@ class RosyTrainingTestTable
     # sanity check: runlog for training data? this can only be the argrec step
     if dataset == "train" and step and step != "argrec"
       raise "Shouldn't be here: #{dataset} #{step}"
-    end
+    end
     if splitID
       # access runlogs of a split table
@@ -637,7 +640,7 @@ class RosyTrainingTestTable
     # learner: concatenation of all learners named in the experiment file,
     # sorted alphabetically.
-    #
+    #
     # @exp.get_lf("classifier") returns: array of pairs [classifier_name, options[array]]
     rl.learner = @exp.get_lf("classifier").map { |classif_name, options| classif_name }.sort.join(" ")
@@ -650,7 +653,7 @@ class RosyTrainingTestTable
       # default: read one frame at a time
       rl.xwise = "frame"
     end
     return rl
   end
@@ -658,16 +661,16 @@ class RosyTrainingTestTable
   # auxiliary for "new runlog" and "existing runlog"
   # to avoid double computation
   #
-  # get a list of RunLog objects, check against a given
+  # get a list of RunLog objects, check against a given
   # RunLog object
   #
-  # returns: runlog object, if found in the given list,
+  # returns: runlog object, if found in the given list,
   #   i.e. if all entries except the column name match
   #   and okay == true
   #   else returns nil
   def existing_runlog_aux(runlogs,               # list of RunLog objects
                           runlog)                # RunLog object
     runlogs.each { |rl|
       if rl.step == runlog.step and
           rl.learner == runlog.learner and
@@ -691,7 +694,7 @@ class RosyTrainingTestTable
   def encode_model_features(step) # string: train/test
     # list model features as hash
     temp = @feature_info.get_model_features(step)
-    model_features = Hash.new
+    model_features = {}
     temp.each { |feature_name|
       model_features[feature_name] = true
     }
@@ -711,7 +714,7 @@ class RosyTrainingTestTable
   # returns: a list of strings, the model features
   def decode_model_features(num) # integer: result of encode_model_features
-    model_features = Array.new
+    model_features = []
     @feature_names.sort.each_with_index { |feature_name, ix|
       if num[ix] == 1
         model_features << feature_name
@@ -749,7 +752,7 @@ class RosyTrainingTestTable
     end
     header << "of experiment '#{@exp.get("experiment_ID")}'\n\n"
-    descr = Array.new
+    descr = []
     loglist.each { |rl|
       unless rl.okay
         next
@@ -766,9 +769,9 @@ class RosyTrainingTestTable
         if count % 5 !=  0
           string << ", "
         end
-	count += 1
+        count += 1
         string << feature_name
-	if count % 5 == 0
+        if count % 5 == 0
           string << "\n\t"
         end
       }
@@ -777,11 +780,10 @@ class RosyTrainingTestTable
     return {
       "table_name" => table_name,
-      "header" => header,
+      "header" => header,
       "runlist" => descr
     }
   end
+end
+end
 end