RubyGems - shalmaneser - Versions diffs - 1.2.0.rc4 → 1.2.rc5 - Mend

shalmaneser 1.2.0.rc4 → 1.2.rc5

Files changed (115) hide show

checksums.yaml +4 -4
data/README.md +47 -18
data/bin/shalmaneser +8 -2
data/doc/index.md +1 -0
data/lib/shalmaneser/opt_parser.rb +68 -67
metadata +49 -119
data/bin/fred +0 -16
data/bin/frprep +0 -34
data/bin/rosy +0 -17
data/lib/common/AbstractSynInterface.rb +0 -1229
data/lib/common/Counter.rb +0 -18
data/lib/common/EnduserMode.rb +0 -27
data/lib/common/Eval.rb +0 -480
data/lib/common/FixSynSemMapping.rb +0 -196
data/lib/common/Graph.rb +0 -345
data/lib/common/ISO-8859-1.rb +0 -24
data/lib/common/ML.rb +0 -186
data/lib/common/Mallet.rb +0 -236
data/lib/common/Maxent.rb +0 -229
data/lib/common/Optimise.rb +0 -195
data/lib/common/Parser.rb +0 -213
data/lib/common/RegXML.rb +0 -269
data/lib/common/RosyConventions.rb +0 -171
data/lib/common/STXmlTerminalOrder.rb +0 -194
data/lib/common/SalsaTigerRegXML.rb +0 -2347
data/lib/common/SalsaTigerXMLHelper.rb +0 -99
data/lib/common/SynInterfaces.rb +0 -282
data/lib/common/TabFormat.rb +0 -721
data/lib/common/Tiger.rb +0 -1448
data/lib/common/Timbl.rb +0 -144
data/lib/common/Tree.rb +0 -61
data/lib/common/config_data.rb +0 -470
data/lib/common/config_format_element.rb +0 -220
data/lib/common/headz.rb +0 -338
data/lib/common/option_parser.rb +0 -13
data/lib/common/prep_config_data.rb +0 -62
data/lib/common/prep_helper.rb +0 -1330
data/lib/common/ruby_class_extensions.rb +0 -310
data/lib/db/db_interface.rb +0 -48
data/lib/db/db_mysql.rb +0 -145
data/lib/db/db_sqlite.rb +0 -280
data/lib/db/db_table.rb +0 -239
data/lib/db/db_wrapper.rb +0 -176
data/lib/db/sql_query.rb +0 -243
data/lib/ext/maxent/Classify.class +0 -0
data/lib/ext/maxent/Train.class +0 -0
data/lib/fred/Baseline.rb +0 -150
data/lib/fred/FileZipped.rb +0 -31
data/lib/fred/FredBOWContext.rb +0 -877
data/lib/fred/FredConventions.rb +0 -232
data/lib/fred/FredDetermineTargets.rb +0 -319
data/lib/fred/FredEval.rb +0 -312
data/lib/fred/FredFeatureExtractors.rb +0 -322
data/lib/fred/FredFeatures.rb +0 -1061
data/lib/fred/FredFeaturize.rb +0 -602
data/lib/fred/FredNumTrainingSenses.rb +0 -27
data/lib/fred/FredParameters.rb +0 -402
data/lib/fred/FredSplit.rb +0 -84
data/lib/fred/FredSplitPkg.rb +0 -180
data/lib/fred/FredTest.rb +0 -606
data/lib/fred/FredTrain.rb +0 -144
data/lib/fred/PlotAndREval.rb +0 -480
data/lib/fred/fred.rb +0 -47
data/lib/fred/fred_config_data.rb +0 -185
data/lib/fred/md5.rb +0 -23
data/lib/fred/opt_parser.rb +0 -250
data/lib/frprep/Ampersand.rb +0 -39
data/lib/frprep/CollinsInterface.rb +0 -1165
data/lib/frprep/Counter.rb +0 -18
data/lib/frprep/FNCorpusXML.rb +0 -643
data/lib/frprep/FNDatabase.rb +0 -144
data/lib/frprep/FrameXML.rb +0 -513
data/lib/frprep/Graph.rb +0 -345
data/lib/frprep/MiniparInterface.rb +0 -1388
data/lib/frprep/RegXML.rb +0 -269
data/lib/frprep/STXmlTerminalOrder.rb +0 -194
data/lib/frprep/SleepyInterface.rb +0 -384
data/lib/frprep/TntInterface.rb +0 -44
data/lib/frprep/TreetaggerInterface.rb +0 -327
data/lib/frprep/do_parses.rb +0 -143
data/lib/frprep/frprep.rb +0 -693
data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
data/lib/frprep/interfaces/stanford_interface.rb +0 -353
data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
data/lib/frprep/one_parsed_file.rb +0 -28
data/lib/frprep/opt_parser.rb +0 -94
data/lib/frprep/ruby_class_extensions.rb +0 -310
data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
data/lib/rosy/ExternalConfigData.rb +0 -58
data/lib/rosy/FailedParses.rb +0 -130
data/lib/rosy/FeatureInfo.rb +0 -242
data/lib/rosy/GfInduce.rb +0 -1115
data/lib/rosy/GfInduceFeature.rb +0 -148
data/lib/rosy/InputData.rb +0 -294
data/lib/rosy/RosyConfusability.rb +0 -338
data/lib/rosy/RosyEval.rb +0 -465
data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
data/lib/rosy/RosyFeaturize.rb +0 -281
data/lib/rosy/RosyInspect.rb +0 -336
data/lib/rosy/RosyIterator.rb +0 -478
data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
data/lib/rosy/RosyPruning.rb +0 -165
data/lib/rosy/RosyServices.rb +0 -744
data/lib/rosy/RosySplit.rb +0 -232
data/lib/rosy/RosyTask.rb +0 -19
data/lib/rosy/RosyTest.rb +0 -829
data/lib/rosy/RosyTrain.rb +0 -234
data/lib/rosy/RosyTrainingTestTable.rb +0 -787
data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
data/lib/rosy/View.rb +0 -418
data/lib/rosy/opt_parser.rb +0 -379
data/lib/rosy/rosy.rb +0 -78
data/lib/rosy/rosy_config_data.rb +0 -121
data/lib/shalmaneser/version.rb +0 -3

data/lib/rosy/RosyPhase2FeatureExtractors.rb DELETED

@@ -1,230 +0,0 @@
-####
-# ke & sp
-# adapted to new feature extractor class,
-# Collins and Tiger features combined:
-# SP November 2005
-#
-# Feature Extractors for Rosy, Phase 2
-#
-# These are features that are computed on the basis of the Phase 1 feature set
-#
-# This consists of all features which have to know feature values for other nodes
-# (e.g. am I the nearest node to the target?) or similar.
-#
-# Contract: each feature extractor inherits from the RosyPhase2FeatureExtractor class
-#
-# Feature extractors return nil if no feature value could be returned
-# Salsa packages
-require 'rosy/AbstractFeatureAndExternal'
-require 'common/SalsaTigerRegXML'
-# Fred and Rosy packages
-require "common/RosyConventions"
-################################
-# base class for all following feature extractors
-class RosyPhase2FeatureExtractor < AbstractFeatureExtractor
-  ###
-  # we do not overwrite "train" and "refresh" --
-  # this is just for features which have to train external models on aspects of the data
-  ###
-  # returns a string: "phase 1" or "phase 2",
-  # depending on whether the feature is computed
-  # directly from the SalsaTigerSentence and the SynNode objects
-  # or whether it is computed from the phase 1 features
-  # computed for the training set
-  #
-  # Here: all features in this packages are phase 2
-  def RosyPhase2FeatureExtractor.phase()
-    return "phase 2"
-  end
-  ###
-  # returns an array of strings, providing information about
-  # the feature extractor
-  def RosyPhase2FeatureExtractor.info()
-    return super().concat(["rosy"])
-  end
-  ###
-  # set sentence, set node, set general settings: this is done prior to
-  # feature computation using compute_feature_value()
-  # such that computations that stay the same for
-  # several features can be done in advance
-  def RosyPhase2FeatureExtractor.set(var_hash)
-    @@split_nones = var_hash["split_nones"]
-    return true
-  end
-  # check if the current feature is computable, i.e. if all the necessary
-  # Phase 1 features are in the present model..
-  def RosyPhase2FeatureExtractor.is_computable(given_extractor_list)
-    return (eval(self.name()).extractor_list - given_extractor_list).empty?
-  end
-  # this probably has to be done for each feature:
-  # identify sentences and the target, and recombine into a large array
-  def compute_features_on_view(view)
-    result = Array.new(eval(self.class.name()).feature_names.length)
-    result.each_index {|i|
-      result[i] = Array.new
-    }
-    view.each_sentence {|instance_features|
-      sentence_result = compute_features_for_sentence(instance_features)
-      if result.length != sentence_result.length
-        raise "Error: number of features computed for a sentence is wrong!"
-      else
-        result.each_index {|i|
-          if sentence_result[i].length != instance_features.length
-            raise "Error: number of feature values does not match number of sentence instances!"
-          end
-          result[i] += sentence_result[i]
-        }
-      end
-    }
-    return result
-  end
-  private
-  # list of all the Phase 1 extractors that a particular feature extractor presupposes
-  def RosyPhase2FeatureExtractor.extractor_list()
-    return []
-  end
-  # compute the feature values for all instances of one sentence
-  # left to be specified
-  # returns (see AbstractFeatureAndExternal) an array of columns (arrays)
-  # The length of the array corresponds to the number of features
-  def compute_features_for_sentence(instance_features) # array of hashes features -> values
-    raise "Overwrite me"
-  end
-end
-##############################################
-# Individual feature extractors
-##############################################
-####################
-# nearestNode
-#
-# compute whether if my head word is the nearest word to the target,
-# according to some criterion
-class NearestNodeFeature < RosyPhase2FeatureExtractor
-  NearestNodeFeature.announce_me()
-  def NearestNodeFeature.designator()
-    return "nearest_node"
-  end
-  def NearestNodeFeature.feature_names()
-    return ["nearest_pt_path",  # the nearest node with a specific pt_path
-            "neareststring_pt",# the nearest pt (string distance)
-            "nearestpath_pt"]   # the nearest pt (path length) ]
-  end
-  def NearestNodeFeature.sql_type()
-    return "TINYINT"
-  end
-  def NearestNodeFeature.feature_type()
-    return "syn"
-  end
-  #####
-  private
-  def NearestNodeFeature.extractor_list()
-    return ["worddistance","pt_path","pt","path_length"]
-  end
-  def compute_features_for_sentence(instance_features)
-    # for each "interesting" feature, compute a hash map value -> index
-    # also compute a hashmap index -> distance
-    # so we efficiently compute, for each feature value, the index with min distance
-    dist_hash = Hash.new # node id -> word distance
-    pl_hash   = Hash.new # node id -> path length
-    path_hash = Hash.new # path -> node id array
-    pt_hash = Hash.new   # pt -> node id array
-    result = [Array.new(instance_features.length),
-              Array.new(instance_features.length),
-              Array.new(instance_features.length)]
-    instance_features.each_index {|inst_id|
-      instance_hash = instance_features[inst_id]
-      dist_hash[inst_id] = instance_hash["worddistance"]
-      pl_hash[inst_id] = instance_hash["path_length"]
-      # record paths
-      pt_path = instance_hash["pt_path"]
-      unless path_hash.key? pt_path
-        path_hash[pt_path] = Array.new
-      end
-      path_hash[pt_path] << inst_id
-      # record pts
-      pt = instance_hash["pt"]
-      unless pt_hash.key? pt
-        pt_hash[pt] = Array.new
-      end
-      pt_hash[pt] << inst_id
-    }
-    # compute feature value for each instance of each path
-    # nearest-path feature is feature 0 of the extractor.
-    path_hash.each {|path,inst_ids|
-      distances = inst_ids.map {|inst_id| dist_hash[inst_id]}
-        min_dist = distances.min
-        inst_ids.each {|inst_id|
-          distance = dist_hash[inst_id]
-        if distance == min_dist and path != @exp.get("noval")
-          result[0][inst_id] = 1
-        else
-          result[0][inst_id] = 0
-        end
-      }
-    }
-    # nearest-pt (string dist) feature is feature 1 of the extractor
-    pt_hash.each{|pt,inst_ids|
-      distances = inst_ids.map {|inst_id| dist_hash[inst_id]}
-      min_dist = distances.min
-      inst_ids.each {|inst_id|
-        distance = dist_hash[inst_id]
-        if distance == min_dist and pt != @exp.get("noval")
-          result[1][inst_id] = 1
-        else
-          result[1][inst_id] = 0
-        end
-      }
-    }
-    # nearest-pt (path length) feature is feature 2 of the extractor
-    pt_hash.each{|pt,inst_ids|
-      path_lengths = inst_ids.map {|inst_id| pl_hash[inst_id]}
-      min_pl = path_lengths.min
-      inst_ids.each {|inst_id|
-        path_length = pl_hash[inst_id]
-        if path_length == min_pl and pt != @exp.get("noval")
-          result[2][inst_id] = 1
-        else
-          result[2][inst_id] = 0
-        end
-      }
-    }
-    return result
-  end
-end

data/lib/rosy/RosyPruning.rb DELETED

@@ -1,165 +0,0 @@
-######
-# XpPrune
-# Katrin Erk Jan 30, 2006
-#
-# Pruning for Rosy: mark constituents that as likely/unlikely to instantiate
-# a role.
-#
-# Pruning currently available:
-# Both Xue/Palmer original and a modified version for FrameNet
-require "common/ruby_class_extensions"
-require "rosy/RosyFeatureExtractors"
-require "common/RosyConventions"
-require "rosy/rosy_config_data"
-require "rosy/RosyIterator"
-###
-# Pruning, derived from the Xue/Palmer algorithm
-#
-# implemented in the Interpreter Class of each individual parser
-class PruneFeature < RosySingleFeatureExtractor
-  PruneFeature.announce_me()
-  def PruneFeature.feature_name()
-    return "prune"
-  end
-  def PruneFeature.sql_type()
-    return "TINYINT"
-  end
-  def PruneFeature.feature_type()
-    return "syn"
-  end
-  def PruneFeature.info()
-    # additional info: I am an index feature
-    return super().concat(["index"])
-  end
-  ################
-  private
-  def compute_feature_instanceOK()
-    retv = @@interpreter_class.prune?(@@node, @@paths, @@terminals_ordered)
-    if [0, 1].include? retv
-      return retv
-    else
-      return 0
-    end
-  end
-end
-####################
-# HIER changeme
-class TigerPruneFeature < RosySingleFeatureExtractor
-  TigerPruneFeature.announce_me()
-  def TigerPruneFeature.feature_name()
-    return "tiger_prune"
-  end
-  def TigerPruneFeature.sql_type()
-    return "TINYINT"
-  end
-  def TigerPruneFeature.feature_type()
-    return "syn"
-  end
-  def TigerPruneFeature.info()
-    # additional info: I am an index feature
-    return super().concat(["index"])
-  end
-  ################
-  private
-  def compute_feature_instanceOK()
-    if @@changeme_tiger_include.include? @@node
-      return 1
-    else
-      return 0
-    end
-  end
-end
-#######################3
-# Pruning:
-# packaging all methods that will be needed to
-# implement it,
-# given that the xp_prune feature defined above
-# has been computed for each constituent during featurization.
-class Pruning
-  ###
-  # returns true if some kind of pruning has been set in the experiment file
-  #  else false
-  def Pruning.prune?(exp)  # Rosy experiment file object
-    if exp.get("prune")
-      return true
-    else
-      return false
-    end
-  end
-  ###
-  # returns: string, the name of the pruning column
-  #  nil if no pruning has been set
-  def Pruning.colname(exp)
-    if exp.get("prune")
-      return exp.get("prune")
-    else
-      return nil
-    end
-  end
-  ###
-  # make ValueRestriction according to the pruning option set in
-  # the experiment file:
-  #       WHERE <pruning_column_name> = 1
-  # where <pruning_column_name> is the name of one of the
-  # pruning features defined above, the same name that has
-  # been set as the value of the pruning parameter in the experiment file
-  #
-  # return: ValueRestriction object (see RosyConventions)
-  #  If no pruning has been set in the experiment file, returns nil
-  def Pruning.restriction_removing_pruned(exp) # Rosy experiment file object
-    if (method = Pruning.colname(exp))
-      return ValueRestriction.new(method, 1)
-    else
-      return nil
-    end
-  end
-  ###
-  # given the name of a DB table column and an iterator that
-  # iterates over some data,
-  # assuming that the column describes some classifier run results,
-  # choose all rows where the pruning column is 0 (i.e. all instances
-  # that have been pruned away) and set the value of the given column
-  # to noval for them all, marking them as "not assigned any role".
-  def Pruning.integrate_pruning_into_run(run_column, # string: run column name
-                                         iterator,   # RosyIterator object
-                                         exp)        # Rosy experiment file object
-    unless Pruning.prune?(exp)
-      # no pruning activated
-      return
-    end
-    iterator.each_group { |group_descr_hash, group|
-      # get a view of all instances for which prune == 0, i.e. that have been pruned away
-      view = iterator.get_a_view_for_current_group(
-                                                   [run_column],
-                                                   [ValueRestriction.new(Pruning.colname(exp), 0)]
-                                                   )
-      # make a list of column values that are all noval
-      all_noval = Array.new
-      view.each_instance_s { |inst|
-        all_noval << exp.get("noval")
-      }
-      # and set all selected instances to noval
-      view.update_column(run_column, all_noval)
-      view.close()
-    }
-  end
-end

data/lib/rosy/RosyServices.rb DELETED

@@ -1,744 +0,0 @@
-# RosyServices
-# KE May 05
-#
-# One of the main task modules of Rosy:
-# remove database tables and experiments,
-# dump experiment to files and load from files
-require "common/ruby_class_extensions"
-# Rosy packages
-require "common/RosyConventions"
-require "rosy/RosyIterator"
-require "rosy/RosySplit"
-require "rosy/RosyTask"
-require "rosy/RosyTrainingTestTable"
-require "rosy/View"
-# Frprep packages
-require "common/prep_config_data"
-###################################################
-class RosyServices < RosyTask
-  def initialize(exp,      # RosyConfigData object: experiment description
-		 opts,     # hash: runtime argument option (string) -> value (string)
-		 ttt_obj)  # RosyTrainingTestTable object
-    ##
-    # remember the experiment description
-    @exp = exp
-    @ttt_obj = ttt_obj
-    ##
-    # check runtime options
-    @tasks = Array.new
-    # defaults:
-    @step = "onestep"
-    @splitID = nil
-    @testID = default_test_ID()
-    opts.each do |opt,arg|
-      case opt
-      when "--deltable", "--delexp", "--delruns", "--delsplit", "--deltables"
-        #####
-        # In enduser mode, you cannot delete things
-        in_enduser_mode_unavailable()
-	@tasks << [opt, arg]
-      when "--dump", "--load", "--writefeatures"
-	@tasks << [opt, arg]
-      when "--step"
-	unless ["argrec", "arglab", "both", "onestep"].include? arg
-	  raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
-	end
-	@step = arg
-      when "--logID"
-        @splitID = arg
-      when "--testID"
-        @testID = arg
-      else
-	# this is an option that is okay but has already been read and used by rosy.rb
-      end
-    end
-    # announce the task
-    $stderr.puts "---------"
-    $stderr.puts "Rosy experiment #{@exp.get("experiment_ID")}: Services."
-    $stderr.puts "---------"
-  end
-  #####
-  # perform
-  #
-  # do each of the inspection tasks set as options
-  def perform()
-    @tasks.each { |opt, arg|
-      case opt
-      when "--deltable"
-        del_table(arg)
-      when "--deltables"
-        del_tables()
-      when "--delexp"
-        del_experiment()
-      when "--delruns"
-        del_runs()
-      when "--delsplit"
-        del_split(arg)
-      when "--dump"
-        dump_experiment(arg)
-      when "--load"
-        load_experiment(arg)
-      when "--writefeatures"
- 	write_features(arg)
-      end
-    }
-  end
-  ################################
-  private
-  #####
-  # del_table
-  #
-  # remove one DB table specified by its name
-  # The method verifies whether the table should be deleted.
-  # If the user gives an answer starting in "y", the table is deleted.
-  def del_table(table_name) # string: name of DB table
-    # check if we have this table
-    unless @ttt_obj.database.list_tables().include? table_name
-      $stderr.puts "Cannot find DB table #{table_name}."
-      return
-    end
-    # really delete?
-    $stderr.print "Really delete DB table #{table_name}? [y/n] "
-    answer = gets().chomp()
-    unless answer =~ /^y/
-      return
-    end
-    begin
-      @ttt_obj.database.drop_table(table_name)
-    rescue
-      $stderr.puts "Error: Removal of #{table_name} failed."
-      return
-    end
-    # done.
-    $stderr.puts "Deleted table #{table_name}."
-  end
-  ######
-  # del_tables
-  #
-  # for all the tables in the database, present their name and size,
-  # and ask if it should be deleted.
-  # this is good for cleaning up!
-  def del_tables()
-    @ttt_obj.database.list_tables().each { |table_name|
-      STDERR.print "Delete table #{table_name} (num. rows #{@ttt_obj.database.num_rows(table_name)})? [y/n] "
-      answer = gets().chomp()
-      if answer =~ /^y/
-        deletion_worked = false
-        begin
-          @ttt_obj.database.drop_table(table_name)
-          deletion_worked = true
-        rescue
-          deletion_worked = false
-        end
-        if deletion_worked
-          STDERR.puts "Table #{name} removed."
-        else
-          $stderr.puts "Error: Removal of #{name} failed."
-        end
-      end
-    }
-  end
-  #####
-  # del_experiment
-  #
-  # remove the experiment described by the experiment file @exp
-  # The method verifies whether the experiment should be deleted.
-  # If the user gives an answer starting in "y", the experiment is deleted.
-  def del_experiment()
-    data_dir = File.new_dir(@exp.instantiate("rosy_dir", "exp_ID" => @exp.get("experiment_ID")))
-    # no data? then don't do anything
-    if not(@ttt_obj.train_table_exists?) and
-        @ttt_obj.testIDs().empty? and
-        @ttt_obj.splitIDs().empty? and
-        Dir[data_dir + "*"].empty?
-      $stderr.puts "No data to delete for experiment #{@exp.get("experiment_ID")}."
-      # we have just made the directory data_dir by calling @exp.new_dir
-      # undo that
-      %x{rmdir #{data_dir}}
-      return
-    end
-    # really delete?
-    $stderr.print "Really delete experiment #{@exp.get("experiment_ID")}? [y/n] "
-    answer = gets().chomp()
-    unless answer =~ /^y/
-      return
-    end
-    # remove main table
-    @ttt_obj.remove_train_table()
-    # remove test tables
-    @ttt_obj.testIDs.each { |testID|
-      @ttt_obj.remove_test_table(testID)
-    }
-    # remove split tables
-    @ttt_obj.splitIDs.each { |splitID|
-      @ttt_obj.remove_split_table(splitID, "train")
-      @ttt_obj.remove_split_table(splitID, "test")
-    }
-    # remove files
-    %x{rm -rf #{data_dir}}
-    # done.
-    $stderr.puts "Deleted experiment #{@exp.get("experiment_ID")}."
-  end
-  ############
-  # del_runs
-  #
-  # interactively remove runs from the current experiment
-  def del_runs()
-    # iterate through all tables and runs
-    @ttt_obj.runlog_to_s_list().each { |table_descr|
-      unless table_descr["runlist"].empty?
-        # print description of the table
-        $stderr.puts table_descr["header"]
-        table_descr["runlist"].each { |run_id, run_descr|
-          $stderr.puts run_descr
-          $stderr.puts "Delete this run? [y/n] "
-          answer = gets().chomp()
-          if answer =~ /^[yY]/
-            @ttt_obj.delete_runlog(table_descr["table_name"], run_id)
-          end
-        }
-      end
-    }
-  end
-  ##############
-  # del_split
-  #
-  # remove the split with the given ID
-  # from the current experiment:
-  # delete split tables, remove from list of test and split tables
-  def del_split(splitID)
-    # does the split exist?
-    unless @ttt_obj.splitIDs.include? splitID
-      $stderr.puts "del_split:"
-      $stderr.puts "Sorry, I don't have a split with ID #{splitID} in experiment #{exp.get("experiment_ID")}."
-      return
-    end
-    # really delete?
-    $stderr.print "Really delete split #{splitID} of experiment #{@exp.get("experiment_ID")}? [y/n] "
-    answer = gets().chomp()
-    unless answer =~ /^y/
-      return
-    end
-    # remove split tables
-    @ttt_obj.remove_split_table(splitID, "train")
-    @ttt_obj.remove_split_table(splitID, "test")
-    # remove classifiers for split
-    ["argrec", "arglab", "onestep"].each { |step|
-      classif_dir = classifier_directory_name(@exp,step, splitID)
-      %x{rm -rf #{classif_dir}}
-    }
-  end
-  ##############
-  # write features to files:
-  # use
-  #  @step, @testID, @splitID to determine feature set to write
-  def write_features(directory) # string: directory to write to, may be nil
-    ###
-    # prepare directory to write to
-    if directory != ""
-      # the user has given a directory.
-      # make sure it ends in /
-      dir = File.new_dir(directory)
-    else
-      # use the default directory: <rosy_dir>/tables
-      dir = File.new_dir(@exp.instantiate("rosy_dir",
-                                          "exp_ID" => @exp.get("experiment_ID")),
-                         "your_feature_files")
-    end
-    $stderr.puts "Writing feature files to directory " + dir
-    ##
-    # check: if this is about a split, do we have it?
-    if @splitID
-      unless @ttt_obj.splitIDs().include?(@splitID)
-        $stderr.puts "Sorry, I have no data for split ID #{@splitID}."
-        exit 1
-      end
-    end
-    ##
-    # inform the user on what we are writing
-    if @splitID
-      $stderr.puts "Writing data according to split '#{@splitID}'"
-    elsif @testID
-      # do we have this test set? else write only training set
-      if @ttt_obj.testIDs().include?(@testID)
-	$stderr.puts "Writing training data, and test data with ID '#{@testID}'"
-      else
-        $stderr.puts "Warning: no data for test ID '#{@testID}', writing only training data."
-	@testID = nil
-      end
-    end
-    $stderr.puts "Writing data for classification step '#{@step}'."
-    $stderr.puts
-    ##
-    # write training data
-    $stderr.puts "Writing training sets"
-    iterator = RosyIterator.new(@ttt_obj, @exp, "train",
-				"step" => @step,
-				"splitID" => @splitID,
-				"prune" => true)
-    # get the list of relevant features,
-    # remove the features that describe the unit by which we train,
-    # since they are going to be constant throughout the training file
-    features = @ttt_obj.feature_info.get_model_features(@step) -
-      iterator.get_xwise_column_names()
-    # but add the gold feature
-    unless features.include? "gold"
-      features << "gold"
-    end
-    write_features_aux(dir, "training", @step, iterator, features)
-    ##
-    # write test data
-    if @testID
-      $stderr.puts "Writing test sets"
-      filename = dir + "test.data"
-      iterator = RosyIterator.new(@ttt_obj, @exp, "test",
-                                  "step" => @step,
-                                  "testID" => @testID,
-                                  "splitID" => @splitID,
-                                  "prune" => true)
-      write_features_aux(dir, "test", @step, iterator, features)
-    end
-  end
-  ########
-  # write_features_aux: actually do the writing
-  def write_features_aux(dir,      # string: directory to write to
-			 dataset,  # string: training or test
-			 step,     # string: argrec, arglab, onestep
-			 iterator, # RosyIterator tuned to what we're writing
-			 features) # array:string: list of features to include in views
-    # proceed one group at a time
-    iterator.each_group { |group_descr_hash, group|
-      # get data for this group
-      view = iterator.get_a_view_for_current_group(features)
-      #filename: e.g. directory/training.Statement.data
-      filename = dir + dataset + "." +
-	step + "." +
-	group.gsub(/\s/, "_") + ".data"
-      begin
-	file = File.new(filename, "w")
-      rescue
-	$stderr.puts "Error: Could not write to file #{filename}, exiting."
-	exit 1
-      end
-      view.each_instance_s { |instance_string|
-	# change punctuation to _PUNCT_
-	# and change empty space to _
-	# because otherwise some classifiers may spit
-	file.puts prepare_output_for_classifiers(instance_string)
-      }
-      file.close()
-      view.close()
-    }
-  end
-  ##############3
-  # dump_experiment
-  #
-  # dump to file:
-  # - main table. filename: main
-  # - test tables. filename: test.<testID>
-  # - split tables. filenames: split.train.<ID>, split.test.<ID>
-  # of the experiment given in @exp.
-  #
-  # Each table is dumped in a separate file:
-  # The first line describes column names,
-  # each following line is one row of the DB.
-  #
-  # Files are written to <rosy_dir>/tables
-  def dump_experiment(directory) #string: directory to write to, may be nil
-    ###
-    # prepare:
-    # directory to write to
-    if directory != ""
-      # the user has given a directory.
-      # make sure it ends in /
-      dir = File.new_dir(directory)
-    else
-      # use the default directory: <rosy_dir>/tables
-      dir = File.new_dir(@exp.instantiate("rosy_dir",
-                                          "exp_ID" => @exp.get("experiment_ID")),
-                         "tables")
-    end
-    $stderr.puts "Writing experiment data to directory " + dir
-    ###
-    # dump main table
-    $stderr.puts "Dumping main table"
-    filename = dir + "main"
-    begin
-      file = File.new(filename, "w")
-    rescue
-      $stderr.puts "Sorry, couldn't write to #{filename}"
-      return
-    end
-    if @ttt_obj.train_table_exists?
-      iterator = RosyIterator.new(@ttt_obj, @exp, "train", "xwise" => "frame")
-      table_obj = @ttt_obj.existing_train_table()
-      aux_dump(iterator, file, table_obj)
-    end
-    ###
-    # dump test tables
-    unless @ttt_obj.testIDs.empty?
-      $stderr.print "Dumping test tables: "
-    end
-    @ttt_obj.testIDs.each { |testID|
-      filename = dir + "test." + testID
-      $stderr.print filename, " "
-      begin
-        file = File.new(filename, "w")
-      rescue
-        $stderr.puts "Sorry, couldn't write to #{filename}"
-        return
-      end
-      if @ttt_obj.test_table_exists?(testID)
-        iterator = RosyIterator.new(@ttt_obj, @exp, "test", "testID" => testID, "xwise" => "frame")
-        table_obj = @ttt_obj.existing_test_table(testID)
-        aux_dump(iterator, file, table_obj)
-      end
-    }
-    unless @ttt_obj.testIDs.empty?
-      $stderr.puts
-    end
-    # dump split tables
-    unless @ttt_obj.splitIDs.empty?
-      $stderr.print "Dumping split tables: "
-    end
-    @ttt_obj.splitIDs.each { |splitID|
-      ["train", "test"].each { |dataset|
-        filename = dir + "split." + dataset + "." + splitID
-        $stderr.print filename, " "
-        begin
-          file = File.new(filename, "w")
-        rescue
-          $stderr.puts "Sorry, couldn't write to #{filename}"
-          return
-        end
-        if @ttt_obj.split_table_exists?(splitID, dataset)
-          iterator = RosyIterator.new(@ttt_obj, @exp, dataset, "splitID" => splitID, "xwise" => "frame")
-          table_obj = @ttt_obj.existing_split_table(splitID, dataset, RosySplit.split_index_colname())
-          aux_dump(iterator, file, table_obj)
-        end
-      }
-    }
-    unless @ttt_obj.splitIDs.empty?
-      $stderr.puts
-    end
-    ###
-    # dump classification run logs
-    @ttt_obj.to_file(dir)
-  end
-  ################3
-  # aux_dump
-  #
-  # auxiliary method for dump_experiment()
-  def aux_dump(iterator, # RosyIterator object, refers to table to write
-               file, # stream: write to this file
-               table_obj) # DB table to be written
-    # write all columns except the autoincrement index
-    # columns_to_write: array:string*string column name, column SQL type
-    columns_to_write = Array.new()
-    @ttt_obj.database.list_column_formats(table_obj.table_name).each { |column_name, column_type|
-      unless column_name == table_obj.index_name
-        # check: when loading we make assumptions on the field types that can happen.
-        # check here that we don't get any unexpected field types
-        case column_type
-        when /^varchar\d*\(\d+\)$/i, /^char\d*\(\d+\)$/i, /^tinyint(\(\d+\))*$/i, /^int/i
-        else
-          $stderr.puts "Problem with SQL type #{column_type} of column #{column_name}:"
-          $stderr.puts "Won't be able to handle it when loading."
-        end
-        columns_to_write << [column_name, column_type]
-      end
-    }
-    columns_as_array = columns_to_write.map { |name, type| name}
-    # write column names and types
-    file.puts columns_to_write.map { |name, type| name }.join(",")
-    file.puts columns_to_write.map { |name, type| type }.join(",")
-    # access groups and write data
-    iterator.each_group { |hash, framename|
-      view = iterator.get_a_view_for_current_group(columns_as_array)
-      # write instances
-      view.each_hash { |instance|
-        file.puts columns_to_write.map { |name, type|
-          # get column entries in order of column names
-          instance[name]
-        }.map { |entry|
-          # remove commas
-          entry.to_s.gsub(/,/, "COMMA")
-        }.join(",")
-      }
-      view.close()
-    }
-  end
-  ##############3
-  # load_experiment
-  #
-  # load from file:
-  # - main table
-  # - test tables
-  # - split tables
-  #
-  # Filenames: see dump_experiment()
-  #
-  # Data is loaded into the current experiment,
-  # previous experiment data is removed
-  #
-  # Each table is loaded from a separate file:
-  # The first line describes column names,
-  # each following line is one row of the DB.
-  def load_experiment(directory) # string: directory to read from, may be nil
-    ###
-    # ask whether this is what the user intended
-    $stderr.puts "Load experiment data from files into the current experiment:"
-    $stderr.puts "This will overwrite existing data of experiment #{@exp.get("experiment_ID")}."
-    $stderr.print "Proceed? [y/n] "
-    answer = gets().chomp()
-    unless answer =~ /^y/
-      return
-    end
-    ##
-    # adjoin preprocessing experiment file to find out about the language of the data
-    # for this it is irrelevant whether we take the training or test
-    # preprocessing experiment file. Take the training file.
-    preproc_expname = @exp.get("preproc_descr_file_train")
-    if not(preproc_expname)
-      $stderr.puts "Please set the name of the preprocessing exp. file name"
-      $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
-      exit 1
-    elsif not(File.readable?(preproc_expname))
-      $stderr.puts "Error in the experiment file:"
-      $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
-      exit 1
-    end
-    preproc_exp = FrPrepConfigData.new(preproc_expname)
-    @exp.adjoin(preproc_exp)
-    ###
-    # read the data where?
-    if directory != ""
-      # the user has given a directory
-      # make sure it exists
-      dir = File.existing_dir(directory)
-    else
-      # default: <rosy_dir>/tables
-      dir = File.existing_dir(@exp.instantiate("rosy_dir",
-                                               "exp_ID" => @exp.get("experiment_ID")),
-                              "tables")
-    end
-    $stderr.puts "Reading experiment data from directory " + dir
-    ###
-    # read tables
-    Dir.foreach(dir) { |filename|
-      case filename
-      when "main"
-        # read main file
-        $stderr.puts "Writing main DB table"
-        file = File.new(dir + filename)
-        col_names, col_types = aux_read_colnames(file, @ttt_obj.feature_names)
-        # start new main table, removing the old
-        table_obj = @ttt_obj.new_train_table()
-        # write file contents to the DB table
-        aux_transfer_to_table(file, table_obj, col_names, col_types)
-      when /^test\.(.+)$/
-        # read test file
-        testID = $1
-        $stderr.puts "Writing test DB table with ID #{testID}"
-        file = File.new(dir + filename)
-        col_names, col_types = aux_read_colnames(file, @ttt_obj.feature_names)
-        # start new test table, removing the old
-        table_obj = @ttt_obj.new_test_table(testID)
-        # write file contents to the DB table
-        aux_transfer_to_table(file, table_obj, col_names, col_types)
-      when /^split\.(train|test)\.(.+)$/
-        dataset = $1
-        splitID = $2
-        $stderr.puts "Writing split #{dataset} DB table with ID #{splitID}"
-        file = File.new(dir + filename)
-        col_names, col_types = aux_read_colnames(file, nil)
-        table_obj = @ttt_obj.new_split_table(splitID, dataset, RosySplit.split_index_colname())
-        # write file contents to the DB table
-        aux_transfer_to_table(file, table_obj, col_names, col_types)
-      else
-        # not a filename we recognize
-        # don't do anything with it
-      end
-    }
-    success = @ttt_obj.from_file(dir)
-    unless success
-      $stderr.puts "Could not read previous classification runs, assume empty."
-    end
-  end
-  ##
-  # aux_read_colnames
-  #
-  # auxiliary method for load_experiment
-  #
-  # read column names from dumped DB table file,
-  # compare to given set of column names,
-  # complain if they don't match
-  #
-  # returns: array*array, first array(strings): column names
-  #   second array(strings): column SQL types
-  def aux_read_colnames(file, # stream: file to read DB table info from
-                        exp_colnames) # array:string, column names defined in the experiment file
-    colnames = aux_read_columns(file)
-    # sanity check: features here the same as in the experiment file?
-    if exp_colnames
-      feature_colnames = colnames.select { |c| c !~ /^#{@exp.get("classif_column_name")}/ }
-      unless feature_colnames.sort() == exp_colnames.sort()
-        raise "Feature name mismatch!\nIn the experiment file, you have specified:\n" +
-            exp_colnames.sort().join(",") +
-            "\nIn the table I'm reading from file I got:\n" +
-            feature_colnames.sort().join(",")
-      end
-    else
-      # no check of column name match requested
-    end
-    coltypes = aux_read_columns(file)
-    return [colnames, coltypes]
-  end
-  ##
-  # aux_transfer_columns
-  #
-  # auxiliary method for load_experiment:
-  # read a line from file, split it at commas
-  #   to arrive at the contents
-  def aux_read_columns(file) # stream: file
-    line = file.gets()
-    if line.nil?
-      return nil
-    end
-    line.chomp!
-    return line.split(",")
-  end
-  ###
-  # aux_transfer_to_table
-  #
-  # auxiliary method for load_experiment:
-  # read columns from file,
-  # write to table, omitting nil values
-  def aux_transfer_to_table(file, # stream: read from this file
-                            table_obj, # DBTable object: write to this table
-                            col_names, # array:string: these are the column names
-                            col_types) # array:string: SQL column types
-    # sp workaround Tue Aug 23
-    # table may have too few classification columns since it has been created with only
-    # the standard set of classification columns. Add more if needed
-    col_names.each {|col_name|
-      if !(table_obj.list_column_names.include? col_name) and col_name =~ /^#{@exp.get("classif_column_name")}/
-        table_obj.change_format_add_columns([[col_name, "VARCHAR(20)"]])
-      end
-    }
-    # write file contents to the DB table
-    names_and_values = Array.new
-    while row =  aux_read_columns(file)
-      names_and_values.clear()
-      col_names.each_with_index { |name, ix|
-        unless row[ix].nil?
-          if col_types[ix] =~ /^(TINYINT|tinyint)/
-            # integer value: map!
-            names_and_values << [name, row[ix].to_i]
-          else
-            # string value: leave as is
-            names_and_values << [name, row[ix]]
-          end
-        end
-      }
-      table_obj.insert_row(names_and_values)
-    end
-  end
- end