RubyGems - shalmaneser-rosy - Versions diffs - 1.2.0.rc4 → 1.2.rc5 - Mend

shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Files changed (41) hide show

checksums.yaml +4 -4
data/README.md +47 -18
data/bin/rosy +14 -7
data/lib/rosy/FailedParses.rb +22 -20
data/lib/rosy/FeatureInfo.rb +35 -31
data/lib/rosy/GfInduce.rb +132 -130
data/lib/rosy/GfInduceFeature.rb +86 -68
data/lib/rosy/InputData.rb +59 -55
data/lib/rosy/RosyConfusability.rb +47 -40
data/lib/rosy/RosyEval.rb +55 -55
data/lib/rosy/RosyFeatureExtractors.rb +295 -290
data/lib/rosy/RosyFeaturize.rb +54 -67
data/lib/rosy/RosyInspect.rb +52 -50
data/lib/rosy/RosyIterator.rb +73 -67
data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
data/lib/rosy/RosyPruning.rb +39 -31
data/lib/rosy/RosyServices.rb +116 -115
data/lib/rosy/RosySplit.rb +55 -53
data/lib/rosy/RosyTask.rb +7 -3
data/lib/rosy/RosyTest.rb +174 -191
data/lib/rosy/RosyTrain.rb +46 -50
data/lib/rosy/RosyTrainingTestTable.rb +101 -99
data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
data/lib/rosy/external_feature_extractor.rb +35 -0
data/lib/rosy/opt_parser.rb +231 -201
data/lib/rosy/rosy.rb +63 -64
data/lib/rosy/rosy_conventions.rb +66 -0
data/lib/rosy/rosy_error.rb +15 -0
data/lib/rosy/var_var_restriction.rb +16 -0
data/lib/shalmaneser/rosy.rb +1 -0
metadata +26 -19
data/lib/rosy/ExternalConfigData.rb +0 -58
data/lib/rosy/View.rb +0 -418
data/lib/rosy/rosy_config_data.rb +0 -121
data/test/frprep/test_opt_parser.rb +0 -94
data/test/functional/functional_test_helper.rb +0 -58
data/test/functional/test_fred.rb +0 -47
data/test/functional/test_frprep.rb +0 -99
data/test/functional/test_rosy.rb +0 -40

data/lib/rosy/TargetsMostFrequentFrame.rb CHANGED Viewed

@@ -1,9 +1,11 @@
+module Shalmaneser
+module Rosy
 module TargetsMostFrequentSc
-  def determine_target_most_frequent_sc(view,
-                                        noval,
+  def determine_target_most_frequent_sc(view,
+                                        noval,
                                         with_frame_default = nil)
-    target_subcat = Hash.new()
-    frame_subcat = Hash.new()
+    target_subcat = {}
+    frame_subcat = {}
     view.each_sentence { |sentence|
@@ -35,8 +37,8 @@ module TargetsMostFrequentSc
     } # each sentence of view
     # most frequent subcat for each target:
-    retv = Hash.new()
-    retv2 = Hash.new()
+    retv = {}
+    retv2 = {}
     [[retv, target_subcat], [retv2, frame_subcat]].each { |out_hash, in_hash|
       in_hash.each_pair { |key, subcats|
@@ -53,8 +55,10 @@ module TargetsMostFrequentSc
   end
   def tmf_target_key(instance)
-    return instance["frame"] + "." +
-           instance["target"] + "." +
-           instance["target_pos"]
+    return instance["frame"] + "." +
+           instance["target"] + "." +
+           instance["target_pos"]
   end
 end
+end
+end

data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} RENAMED Viewed

@@ -1,11 +1,13 @@
 # Katrin Erk November 05
-#
+#
 # Abstract classes for
 # - Rosy features
 # - Rosy interface for external knowledge sources.
-require 'rosy/ExternalConfigData'
+require 'configuration/external_config_data'
+module Shalmaneser
+module Rosy
 ####
 # Feature Extractor:
 # computes one or more features for a node (a SynNode object) out of
@@ -21,15 +23,15 @@ class AbstractFeatureExtractor
   # returns a string: the designator for this feature extractor
   # (an extractor may compute several features, but
   #  in the experiment file it is chosen by a single designator)
-  def AbstractFeatureExtractor.designator()
+  def self.designator
     raise "Overwrite me"
   end
   ###
-  # returns an array of feature names, the names of the
+  # returns an array of feature names, the names of the
   # features that it can compute.
   # The number of features that the extractor computes must be fixed.
-  def AbstractFeatureExtractor.feature_names()
+  def self.feature_names
     raise "Overwrite me."
   end
@@ -37,12 +39,12 @@ class AbstractFeatureExtractor
   # returns a string: the data type for the feature
   # to be passed on to the MySQL database,
   # e.g. VARCHAR(10), INT
-  def AbstractFeatureExtractor.sql_type()
+  def self.sql_type
     raise "Overwrite me"
   end
   ###
-  # returns a string: the feature type
+  # returns a string: the feature type
   # (the same for all features computed by this extractor)
   # possible values:
   # - gold: gold label
@@ -50,7 +52,7 @@ class AbstractFeatureExtractor
   # - syn: feature computed from syntactic characteristics of the instance
   # - sem: feature involving semantic characteristics of the instance
   # - sentlevel: this feature is the same for all instances of a sentence
-  def AbstractFeatureExtractor.feature_type()
+  def self.feature_type
     raise "Overwrite me"
   end
@@ -59,19 +61,19 @@ class AbstractFeatureExtractor
   # depending on whether the feature is computed
   # directly from the SalsaTigerSentence and the SynNode objects
   # or whether it is computed from the phase 1 features
-  def AbstractFeatureExtractor.phase()
+  def self.phase
     raise "Overwrite me."
   end
   ###
   # returns an array of strings, providing information about
   # the feature extractor
-  def AbstractFeatureExtractor.info()
-    return []
+  def self.info
+    []
   end
   ###
-  # set sentence, set node, set other settings:
+  # set sentence, set node, set other settings:
   # this is done prior to
   # feature computation using compute_feature()
   # such that computations that stay the same for
@@ -84,10 +86,11 @@ class AbstractFeatureExtractor
                                             frame) # FrameNode object
     @@sent = sent
     @@frame = frame
     return true
   end
+  # @todo Rename and change the return value.
   def AbstractFeatureExtractor.set_node(node) # SynNode of the sentence set in set_sentence
     @@node = node
@@ -101,7 +104,7 @@ class AbstractFeatureExtractor
   # several features can be done in advance
   def AbstractFeatureExtractor.set(var_hash = {})
     # no settings at this point
     return true
   end
   # test during initialisation whether a feature is computable
@@ -124,12 +127,12 @@ class AbstractFeatureExtractor
   #
   # returns an array of features (strings), length the same as the
   # length of feature_names()
-  def compute_features()
+  def compute_features
     raise "overwrite me"
   end
   ###
-  # phase 2 extractors:
+  # phase 2 extractors:
   # compute features for a complete view
   #
   # returns: an array of columns,
@@ -139,7 +142,7 @@ class AbstractFeatureExtractor
     raise "overwrite me"
   end
-  # At this place, we had abstract methods for "training" phase 2 features
+  # At this place, we had abstract methods for "training" phase 2 features
   # Since this involves introducing a "state" that is nontrivial to preserve
   # for a standalone version of the classifiers, without keeping the training data,
   # we decided to remove this functionality (30.11.05).
@@ -149,94 +152,16 @@ class AbstractFeatureExtractor
   ######
   protected
-  def AbstractFeatureExtractor.announce_me()
+  def AbstractFeatureExtractor.announce_me
     # AB: In 1.9 constants are symbols.
     if Module.constants.include?("RosyFeatureInfo") or Module.constants.include?(:RosyFeatureInfo)
       # yup, we have a class to which we can announce ourselves
-      RosyFeatureInfo.add_feature(eval(self.name()))
+      RosyFeatureInfo.add_feature(self)
     else
       # no interface collector class
 #      $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
     end
   end
 end
-################################################################
-# Wrapper class for extractors that compute a single feature
-class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
-  ###
-  # returns a string: the designator for this feature extractor
-  # (an extractor may compute several features, but
-  #  in the experiment file it is chosen by a single designator)
-  #
-  # here: single feature, and the feature name is the designator
-  def AbstractFeatureExtractor.designator()
-    return eval(self.name()).feature_name()
-  end
-  ###
-  def AbstractSingleFeatureExtractor.feature_names()
-    return [eval(self.name()).feature_name()]
-  end
-  ###
-  def compute_features()
-    return [compute_feature()]
-  end
-  def compute_features_on_view(view) # DBView object
-    return [compute_feature_on_view(view)]
-  end
-  ######
-  # Single-feature methods
-  ###
-  def AbstractSingleFeatureExtractor.feature_name()
-    raise "Overwrite me."
-  end
-  ###
-  def compute_feature()
-    raise "Overwrite me"
-  end
-  ###
-  def compute_feature_on_view(view) # DBView object
-    raise "Overwrite me"
-  end
 end
-######################################################
-class ExternalFeatureExtractor < AbstractFeatureExtractor
-  @@warning_uttered = false
-  ####
-  # initialization:
-  #
-  # read experiment file for external interfaces
-  def initialize(exp,    # RosyConfigData object
-                 interpreter_class)
-    @exp_rosy = exp
-    @@interpreter_class = interpreter_class
-    unless @exp_rosy.get("external_descr_file")
-      unless @@warning_uttered
-	$stderr.puts "Warning: Cannot compute external feature"
-	$stderr.puts "since 'external_descr_file' has not been set"
-	$stderr.puts "in the Rosy experiment file."
-	@@warning_uttered = true
-      end
-      @exp_external = nil
-      return
-    end
-    @exp_external = ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
-  end
 end

data/lib/rosy/abstract_single_feature_extractor.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require_relative 'abstract_feature_extractor'
+module Shalmaneser
+module Rosy
+################################################################
+# Wrapper class for extractors that compute a single feature
+class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
+  ###
+  # returns a string: the designator for this feature extractor
+  # (an extractor may compute several features, but
+  #  in the experiment file it is chosen by a single designator)
+  #
+  # here: single feature, and the feature name is the designator
+  def self.designator
+    feature_name
+  end
+  ###
+  def self.feature_names
+    [feature_name]
+  end
+  ###
+  def compute_features
+    [compute_feature]
+  end
+  def compute_features_on_view(view) # DBView object
+    [compute_feature_on_view(view)]
+  end
+  ######
+  # Single-feature methods
+  ###
+  def self.feature_name
+    raise "Overwrite me."
+  end
+  ###
+  def compute_feature
+    raise "Overwrite me"
+  end
+  ###
+  def compute_feature_on_view(view) # DBView object
+    raise "Overwrite me"
+  end
+end
+end
+end

data/lib/rosy/external_feature_extractor.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require_relative 'abstract_feature_extractor'
+require 'configuration/external_config_data'
+module Shalmaneser
+module Rosy
+class ExternalFeatureExtractor < AbstractFeatureExtractor
+  @@warning_uttered = false
+  ####
+  # initialization:
+  #
+  # read experiment file for external interfaces
+  # @param [RosyConfigData] exp object
+  def initialize(exp, interpreter_class)
+    @exp_rosy = exp
+    @@interpreter_class = interpreter_class
+    unless @exp_rosy.get("external_descr_file")
+      unless @@warning_uttered
+        $stderr.puts "Warning: Cannot compute external feature"
+        $stderr.puts "since 'external_descr_file' has not been set"
+        $stderr.puts "in the Rosy experiment file."
+        @@warning_uttered = true
+      end
+      @exp_external = nil
+      return
+    end
+    @exp_external = Shalmaneser::Configuration::ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
+  end
+end
+end
+end

data/lib/rosy/opt_parser.rb CHANGED Viewed

@@ -1,148 +1,179 @@
 # -*- coding: utf-8 -*-
 require 'getoptlong'
+require 'definitions'
+require 'configuration/rosy_config_data'
+module Shalmaneser
+  module Rosy
+    class OptParser
+      def self.parse(cmd_args)
+        ##############################
+        # main starts here
+        ##############################
+        ##
+        # evaluate runtime arguments
+        tasks = {
+          "featurize" => [
+            # test table ID, required for test, no default
+            ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
+            # set to featurize: 'train' or 'test', no default
+            ['--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT],
+            # splitlog ID: if given, featurize this split. Cannot use both this and -d
+            ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
+            ['--append', '-A', GetoptLong::NO_ARGUMENT]
+          ],
+          "split" => [
+            # splitlog ID, required, no default
+            ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
+            # percentage training data, default: 90
+            ['--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT]
+          ],
+          "train" => [
+            # splitlog ID; if given, will train on split rather than all of main table
+            ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
+            # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
+            ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT]
+          ],
+          "test" => [
+            # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
+            ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
+            # test table ID: if given, test on this table
+            ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
+            # splitlog ID: if given, test on this split. Cannot use both this and -i
+            ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
+            # set this to prevent output of disambiguated test data
+            ['--nooutput', '-N', GetoptLong::NO_ARGUMENT]
+          ],
+          "eval" => [
+            # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
+            ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
+            # test table ID: if given, test on this table
+            ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
+            ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
+          ],
+          "inspect" => [
+            # describe all tables
+            ['--tables', GetoptLong::NO_ARGUMENT],
+            # describe table contents for current experiment
+            ['--tablecont', GetoptLong::OPTIONAL_ARGUMENT],
+            # test table ID: if given, describe contents of this table
+            ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
+            # describe classification runs for current experiment
+            ['--runs', GetoptLong::NO_ARGUMENT],
+            # list sentence IDs for given splitlog
+            ['--split', GetoptLong::REQUIRED_ARGUMENT]
+          ],
+          "services" => [
+            # delete database table
+            ['--deltable', GetoptLong::REQUIRED_ARGUMENT],
+            # delete experiment tables and files
+            ['--delexp', GetoptLong::NO_ARGUMENT],
+            # delete tables interactively
+            ['--deltables', GetoptLong::NO_ARGUMENT],
+            # delete runs
+            ['--delruns', GetoptLong::NO_ARGUMENT],
+            # delete split
+            ['--delsplit', GetoptLong::REQUIRED_ARGUMENT],
+            # dump experiment to files
+            ['--dump', GetoptLong::OPTIONAL_ARGUMENT],
+            # load experiment from files
+            ['--load', GetoptLong::OPTIONAL_ARGUMENT],
+            # write feature files
+            ['--writefeatures', GetoptLong::OPTIONAL_ARGUMENT],
+            # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
+            ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
+            # test table ID: if given, test on this table
+            ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
+            # splitlog ID: if given, test on this split. Cannot use both this and -i
+            ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
+          ]
+        }
+        optnames = [
+          # get help
+          ['--help', '-h', GetoptLong::NO_ARGUMENT],
+          # experiment file name (and path), no default
+          ['--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT],
+          # task to perform: one of task.keys, no default
+          ['--task', '-t', GetoptLong::REQUIRED_ARGUMENT]
+        ]
+        tasks.values.each { |more_optnames| optnames.concat more_optnames }
+        optnames.uniq!
+        begin
+          opts = GetoptLong.new(*optnames)
+        rescue => e
+          $stderr.puts "Error: unknown command line option: #{e.message}!"
+          exit 1
+        end
-require 'rosy/rosy_config_data'
-module Rosy
-  class OptParser
-    def self.parse(cmd_args)
-      ##############################
-      # main starts here
-      ##############################
-      ##
-      # evaluate runtime arguments
-      tasks = {
-        "featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],             # test table ID, required for test, no default
-                         [ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT],            # set to featurize: 'train' or 'test', no default
-                         ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],               # splitlog ID: if given, featurize this split. Cannot use both this and -d
-                         ['--append', '-A', GetoptLong::NO_ARGUMENT]
-                       ],
-        "split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],   # splitlog ID, required, no default
-                     [ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT]       # percentage training data, default: 90
-                   ],
-        "train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],   # splitlog ID; if given, will train on split rather than all of main table
-                     ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT]                # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
-                   ],
-        "test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],     # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
-                    [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],            # test table ID: if given, test on this table
-                    ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],              # splitlog ID: if given, test on this split. Cannot use both this and -i
-                    [ '--nooutput', '-N', GetoptLong::NO_ARGUMENT]                # set this to prevent output of disambiguated test data
-                  ],
-        "eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],      # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
-                   [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],            # test table ID: if given, test on this table
-                   ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
-                  ],
-        "inspect" => [['--tables', GetoptLong::NO_ARGUMENT],             # describe all tables
-                      [ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT],               # describe table contents for current experiment
-                      [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],            # test table ID: if given, describe contents of this table
-                      [ '--runs', GetoptLong::NO_ARGUMENT],                          # describe classification runs for current experiment
-                      [ '--split', GetoptLong::REQUIRED_ARGUMENT]                    # list sentence IDs for given splitlog
-                     ],
-        "services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT],    # delete database table
-                       [ '--delexp', GetoptLong::NO_ARGUMENT],                        # delete experiment tables and files
-                       [ '--deltables', GetoptLong::NO_ARGUMENT],                     # delete tables interactively
-                       [ '--delruns', GetoptLong::NO_ARGUMENT],                       # delete runs
-                       [ '--delsplit', GetoptLong::REQUIRED_ARGUMENT],                # delete split
-                       [ '--dump', GetoptLong::OPTIONAL_ARGUMENT],                    # dump experiment to files
-                       [ '--load', GetoptLong::OPTIONAL_ARGUMENT],                    # load experiment from files
-                       [ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT],           # write feature files
-                       ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],     # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
-                       [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],            # test table ID: if given, test on this table
-                       ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]              # splitlog ID: if given, test on this split. Cannot use both this and -i
-                      ]
-      }
-      optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT],            # get help
-                  [ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT],              # experiment file name (and path), no default
-                  [ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ]                # task to perform: one of task.keys, no default
-                 ]
-      tasks.values.each { |more_optnames|
-        optnames.concat more_optnames
-      }
-      optnames.uniq!
-      # asterisk: "explode" array into individual parameters
-      begin
-        opts = options_hash(GetoptLong.new(*optnames))
-      rescue
-        $stderr.puts "Error: unknown command line option: " + $!
-        exit 1
-      end
-      experiment_filename = nil
-      ##
-      # are we being asked for help?
-      if opts['--help']
-        help()
-        exit(0)
-      end
-      ##
-      # now find the task
-      task = opts['--task']
-      # sanity checks for task
-      if task.nil?
-        help()
-        exit(0)
-      end
-      unless tasks.keys.include? task
-        $stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
-        exit 1
-      end
-      ##
-      # now evaluate the rest of the options
-      opts.each_pair { |opt,arg|
-        case opt
-        when '--help', '--task'
+        experiment_filename = nil
+        opts = options_hash(opts)
+        ##
+        # are we being asked for help?
+        # @ todo work with the empty case
+        if opts['--help']
+          help
+          exit(0)
+        end
+        ##
+        # now find the task
+        task = opts['--task']
+        # sanity checks for task
+        if task.nil?
+          help
+          exit(0)
+        end
+        unless tasks.keys.include?(task)
+          $stderr.puts "Sorry, I don't know the task '#{task}'. Do 'rosy -h' for a list of tasks."
+          exit 1
+        end
+        ##
+        # now evaluate the rest of the options
+        opts.each_pair do |opt, arg|
+          case opt
+          when '--help', '--task'
           # we already handled this
-        when '--expfile'
-          experiment_filename = arg
-        else
-          # do we know this option?
-          unless tasks[task].assoc(opt)
-            $stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
-            $stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
-            exit 1
+          when '--expfile'
+            experiment_filename = arg
+          else
+            # do we know this option?
+            unless tasks[task].assoc(opt)
+              $stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
+              $stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
+              exit 1
+            end
           end
         end
-      }
-      if experiment_filename.nil?
-        $stderr.puts "I need an experiment file name, option --expfile|-e"
-        exit 1
-      end
-      ##
-      # open config file
-      exp = RosyConfigData.new(experiment_filename)
-      # sanity checks
-      unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
-        $stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
-        exit 1
+        # @todo This case is irreal since this restriction is set by the parser itself.
+        if experiment_filename.nil?
+          $stderr.puts "I need an experiment file name, option --expfile|-e"
+          exit 1
+        end
+        ##
+        # open config file
+        exp = ::Shalmaneser::Configuration::RosyConfigData.new(experiment_filename)
+        [exp, opts]
       end
-      # enduser mode?
-      $ENDUSER_MODE = exp.get("enduser_mode")
-      [exp, opts]
-    end
-    private
-    def self.help
-      $stderr.puts "
-ROSY: semantic ROle assignment SYstem Version 0.2
+      private
+      def self.help
+        $stderr.puts "
+ROSY: semantic ROle assignment SYstem, Version #{VERSION}
 Usage:
@@ -150,18 +181,18 @@ ruby rosy.rb --help|-h
   gets you this help text.
-ruby rosy.rb --task|-t featurize --expfile|-e <e>
-             [--dataset|-d <d>]  [--testID|-i <i>]
+ruby rosy.rb --task|-t featurize --expfile|-e <e>
+             [--dataset|-d <d>]  [--testID|-i <i>]
              [--logID|-l <l> ] [--append|-A]
   featurizes input data and stores it in a database.
   Enduser mode: dataset has to be 'test' (preset as default),
     no --append.
-  --expfile <e>   Use <e> as the experiment description and
+  --expfile <e>   Use <e> as the experiment description and
                   configuration file
-  --dataset <d>   Set to featurize: <d> is either 'train'
-                  (put data into main table) or 'test' (put data
+  --dataset <d>   Set to featurize: <d> is either 'train'
+                  (put data into main table) or 'test' (put data
                   into separate test table with ID given using --testID)
                   Use at least one of --logID, --dataset.
@@ -171,15 +202,15 @@ ruby rosy.rb --task|-t featurize --expfile|-e <e>
                   Use at least one of --logID, --dataset.
   --testID <i>    Use <i> as the ID for the table to store the test data.
-                  necessary only with '--dataset test'. default: #{default_test_ID()}.
+                  necessary only with '--dataset test'. default: #{::Rosy.default_test_ID}.
-  --append        Do not overwrite previously computed features
+  --append        Do not overwrite previously computed features
                   for this experiment.
-                  Rather, append the new features
+                  Rather, append the new features
                   to the old featurization files.
                   Default: overwrite
-ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
+ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
             [--trainpercent|-r <r>]
   produces a new train/test split on the main table of the experiment.
   Not available in enduser mode.
@@ -188,7 +219,7 @@ ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
   --logID <l>     Use <l> as the ID for storing this new split
-  --trainpercent <r> Allocate <r> percent of the data as train,
+  --trainpercent <r> Allocate <r> percent of the data as train,
                   and 100-<r> as test
                   default: <r>=90
@@ -200,20 +231,20 @@ ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
   --expfile <f>   Use <f> as the experiment description and configuration file
   --step <s>      What kind of classifier(s) to train?
-                  <s>=argrec: argument recognition,
+                  <s>=argrec: argument recognition,
                                 distinguish role from nonrole
-                  <s>=arglab: argument labeling, naming roles,
+                  <s>=arglab: argument labeling, naming roles,
                                 builds on argrec
                   <s>=both:   first argrec, then arglab
                   <s>=onestep: do argument labeling right away without
                                 prior filtering of non-arguments
                   default: both
-  --logID <l>     If given, train on this split of the main table rather than
+  --logID <l>     If given, train on this split of the main table rather than
                   the whole main table
-ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
+ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
              [--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
   apply classifier(s) on data from a test table, or a main table split
   Enduser mode: only -s both, -s onestep available. Cleanup: Database with
@@ -222,12 +253,12 @@ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
   --expfile <f>   Use <f> as the experiment description and configuration file
   --step <s>      What kind of classifier(s) to use for testing?
-                  <s>=argrec: argument recognition,
+                  <s>=argrec: argument recognition,
                                 distinguish role from nonrole
-                  <s>=arglab: argument labeling, naming roles,
+                  <s>=arglab: argument labeling, naming roles,
                                 builds on argrec
                   <s>=both:   first argrec, then arglab
-                  <s>=onestep: do argument labeling right away without
+                  <s>=onestep: do argument labeling right away without
                                 prior filtering of non-arguments
                   default: both
   --logID <l>     If given, test on this split of the main table
@@ -235,31 +266,31 @@ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
   --testID <i>    If given, test on this test table.
                   (Use either this option or -l)
-  --nooutput      Do not produce an output of the disambiguated test data
+  --nooutput      Do not produce an output of the disambiguated test data
                   in SalsaTigerXML format. This is useful if you just want
                   to evaluate the system.
                   Default: output is produced.
-ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
-             [--logID|-l <l> | --testID|-i <i>
+ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
+             [--logID|-l <l> | --testID|-i <i>
   evaluate the classification results.
   Not available in enduser mode.
   --expfile <f>   Use <f> as the experiment description and configuration file
   --step <s>      Evaluate results of which classification step?
-                  <s>=argrec: argument recognition,
+                  <s>=argrec: argument recognition,
                                 distinguish role from nonrole
-                  <s>=arglab: argument labeling, naming roles,
+                  <s>=arglab: argument labeling, naming roles,
                                 builds on argrec
                   <s>=both:   first argrec, then arglab
-                  <s>=onestep: do argument labeling right away without
+                  <s>=onestep: do argument labeling right away without
                                 prior filtering of non-arguments
                   default: both
                   Need not be given if --runID is given.
-  --logID <l>     If given, evaluate on the test data from this split of
+  --logID <l>     If given, evaluate on the test data from this split of
                   the main table.
                   (use either this option or -i or -R)
@@ -267,29 +298,29 @@ ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
                   (Use either this option or -l or -R)
-ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
+ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
              [--tablecont [N]] [--testID|-i <i>] [--split <l>]
-  inspect system-internal data, both global and pertaining to the current
+  inspect system-internal data, both global and pertaining to the current
   experiment.
-  If no options are chosen, an overview of the current experiment
+  If no options are chosen, an overview of the current experiment
   is given.
-  --expfile <f>   Use <f> as the experiment description and
+  --expfile <f>   Use <f> as the experiment description and
                   configuration file
   --tables        Lists all tables of the DB: table name,column names
-  --tablecont [N|id:N] Lists the training instances (as feature vectors)
+  --tablecont [N|id:N] Lists the training instances (as feature vectors)
                   of the current experiment.
                   If test ID is given, test instances are listed as well.
                   The optional argument may have one of two forms:
-                  - It may be a number N. Then only the N first lines
+                  - It may be a number N. Then only the N first lines
                     of each set are listed.
                   - It may be a pair id:N. Then only the N first lines of
                     the DB table with ID id are listed. To list all lines
                     of a single DB table, use id:
-  --testID <i>    If given, --tablecont also lists the feature vectors for
+  --testID <i>    If given, --tablecont also lists the feature vectors for
                   this test table
   --runs          List all classification runs of the current experiment
@@ -298,29 +329,29 @@ ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
 ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
              [--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
-             [--delsplit <l>] [--writefeatures [<D>]]
-             [--step|-s <s>]  [--testID|-i <i>] [--logID|-l <l> ]
+             [--delsplit <l>] [--writefeatures [<D>]]
+             [--step|-s <s>]  [--testID|-i <i>] [--logID|-l <l> ]
   diverse services.
   The --del* services are not available in enduser mode.
   --dump [<D>]    Dump the database tables for the current experiment file.
                   If a directory <D> is given, the tables are written there,
-                  otherwise they are written to
-                  data_dir/<experiment_ID>/tables, where data_dir is the
+                  otherwise they are written to
+                  data_dir/<experiment_ID>/tables, where data_dir is the
                   data directory given in the experiment file.
                   No existing files in the directory are removed.
   --load [<D>]    Construct new database tables from the files in
-                  the directory <D>, if it is given, otherwise from
-                  data_dir/<experiment_id>/tables, where data_dir
+                  the directory <D>, if it is given, otherwise from
+                  data_dir/<experiment_id>/tables, where data_dir
                   is the data directory given in the experiment file.
-                  Warning: Database tables are loaded into the
+                  Warning: Database tables are loaded into the
                   current experiment, the one described in the
                   experiment file. Existing data in tables with
                   the same names is overwritten!
   --deltable <t>  Remove database table <t>
   --deltables     Presents all tables in the database for interactive deletion
   --delexp        Remove the experiment described in the given experiment file,
@@ -337,14 +368,14 @@ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
                   system. If <D> is not given, feature files are written
                   to data_dir/<experiment_id>/your_feature_files/.
-                  Uses the parameters --step, --testID, --logID to
+                  Uses the parameters --step, --testID, --logID to
                   determine which feature files will be written.
   --step <s>      Use with --writefeatures: task for which to write features.
-                  <s>=argrec: argument recognition,
+                  <s>=argrec: argument recognition,
                                 distinguish role from nonrole
                   <s>=arglab: argument labeling, naming roles
-                  <s>=onestep: do argument labeling right away without
+                  <s>=onestep: do argument labeling right away without
                                 prior filtering of non-arguments
                   default: onestep.
@@ -352,28 +383,27 @@ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
                   for the the split with ID <l>.
   --testID <i>    Use with --writefeatures: write features
-                  for the test set with ID <i>.
-                  default: #{default_test_ID()}.
+                  for the test set with ID <i>.
+                  default: #{::Shalmaneser::Rosy.default_test_ID}.
 "
+      end
-    end
-    ###
-    # options_hash:
-    #
-    # GetoptLong only allows you to access options via each(),
-    # not individually, and it only allows you to cycle through the options once.
-    # So we re-code the options as a hash
-    def self.options_hash(opts_obj) # GetoptLong object
-      opt_hash = Hash.new
-      opts_obj.each do |opt, arg|
-        opt_hash[opt] = arg
+      ###
+      # options_hash:
+      #
+      # GetoptLong only allows you to access options via each(),
+      # not individually, and it only allows you to cycle through the options once.
+      # So we re-code the options as a hash
+      def self.options_hash(opts_obj) # GetoptLong object
+        opt_hash = {}
+        opts_obj.each do |opt, arg|
+          opt_hash[opt] = arg
+        end
+        opt_hash
       end
-      return opt_hash
-    end
-  end # class OptParser
-end # module Rosy
+    end # class OptParser
+  end # module Rosy
+end