RubyGems - shalmaneser-rosy - Versions diffs - 1.2.0.rc4 → 1.2.rc5 - Mend

shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Files changed (41) hide show

checksums.yaml +4 -4
data/README.md +47 -18
data/bin/rosy +14 -7
data/lib/rosy/FailedParses.rb +22 -20
data/lib/rosy/FeatureInfo.rb +35 -31
data/lib/rosy/GfInduce.rb +132 -130
data/lib/rosy/GfInduceFeature.rb +86 -68
data/lib/rosy/InputData.rb +59 -55
data/lib/rosy/RosyConfusability.rb +47 -40
data/lib/rosy/RosyEval.rb +55 -55
data/lib/rosy/RosyFeatureExtractors.rb +295 -290
data/lib/rosy/RosyFeaturize.rb +54 -67
data/lib/rosy/RosyInspect.rb +52 -50
data/lib/rosy/RosyIterator.rb +73 -67
data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
data/lib/rosy/RosyPruning.rb +39 -31
data/lib/rosy/RosyServices.rb +116 -115
data/lib/rosy/RosySplit.rb +55 -53
data/lib/rosy/RosyTask.rb +7 -3
data/lib/rosy/RosyTest.rb +174 -191
data/lib/rosy/RosyTrain.rb +46 -50
data/lib/rosy/RosyTrainingTestTable.rb +101 -99
data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
data/lib/rosy/external_feature_extractor.rb +35 -0
data/lib/rosy/opt_parser.rb +231 -201
data/lib/rosy/rosy.rb +63 -64
data/lib/rosy/rosy_conventions.rb +66 -0
data/lib/rosy/rosy_error.rb +15 -0
data/lib/rosy/var_var_restriction.rb +16 -0
data/lib/shalmaneser/rosy.rb +1 -0
metadata +26 -19
data/lib/rosy/ExternalConfigData.rb +0 -58
data/lib/rosy/View.rb +0 -418
data/lib/rosy/rosy_config_data.rb +0 -121
data/test/frprep/test_opt_parser.rb +0 -94
data/test/functional/functional_test_helper.rb +0 -58
data/test/functional/test_fred.rb +0 -47
data/test/functional/test_frprep.rb +0 -99
data/test/functional/test_rosy.rb +0 -40

data/lib/rosy/RosySplit.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 # One of the main task modules of Rosy:
 # split training data into training and test parts
 #
-# A split is realized as two DB tables,
+# A split is realized as two DB tables,
 # one with the sentence IDs of the training part of the split,
 # and one with the sentence IDs of the test part of the split.
 #
@@ -13,30 +13,28 @@
 # Phase 2 features are trained on training features and applied to
 # test features. They need to be retrained for each split.
-require "common/ruby_class_extensions"
+require "ruby_class_extensions"
 # Frprep packages
-require "common/prep_config_data"
+require 'configuration/frappe_config_data'
 # Rosy packages
 require "rosy/FailedParses"
-require "rosy/FeatureInfo"
-require "common/RosyConventions"
+# require "rosy/FeatureInfo"
+# require "RosyConventions"
+require 'rosy/var_var_restriction'
 require "rosy/RosyIterator"
 require "rosy/RosyTask"
-require "rosy/RosyTrainingTestTable"
-require "rosy/View"
+# require "rosy/RosyTrainingTestTable"
+# require "rosy/View"
+module Shalmaneser
+module Rosy
 class RosySplit < RosyTask
   def initialize(exp,      # RosyConfigData object: experiment description
-		 opts,     # hash: runtime argument option (string) -> value (string)
-		 ttt_obj)  # RosyTrainingTestTable object
-    #####
-    # In enduser mode, this whole task is unavailable
-    in_enduser_mode_unavailable()
+                 opts,     # hash: runtime argument option (string) -> value (string)
+                 ttt_obj)  # RosyTrainingTestTable object
     ##
     # remember the experiment description
@@ -58,8 +56,8 @@ class RosySplit < RosyTask
       when "--logID"
         @splitID = arg
       else
-	# this is an option that is okay but has already been read and used by rosy.rb
-      end
+        # this is an option that is okay but has already been read and used by rosy.rb
+      end
     end
     #sanity checks
@@ -82,7 +80,9 @@ class RosySplit < RosyTask
       $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
       exit 1
     end
-    preproc_exp = FrPrepConfigData.new(preproc_filename)
+    # @todo Add features for Rosy and delete this dependency.
+    preproc_exp = ::Shalmaneser::Configuration::FrappeConfigData.new(preproc_filename)
     @exp.adjoin(preproc_exp)
     # announce the task
@@ -94,34 +94,34 @@ class RosySplit < RosyTask
   #####
   # perform
   #
-  # perform a split of the training data and the "failed sentences" object
-  # the split is written to a DB table, the failed sentence splits are written to files
-  def perform()
+  # perform a split of the training data and the "failed sentences" object
+  # the split is written to a DB table, the failed sentence splits are written to files
+  def perform
     #################################
     # 1. treat the failed sentences
-    perform_failed_parses()
+    perform_failed_parses
     ###############################
     # 2. get the main table, split it, and write the result to two new tables
-    perform_make_split()
+    perform_make_split
     ###############################
     # 3. Repeat the training and extraction of phase 2 features for this split,
     #    and write the result to the split tables
   end
   #######
   # split index column name
-  def RosySplit.split_index_colname()
+  def RosySplit.split_index_colname
     return "split_index"
   end
   ############
   # make_join_restriction
   #
-  # Given a splitID, the main table to be split,
+  # Given a splitID, the main table to be split,
   # the dataset (train or test), and the experiment file object,
   # make a ValueRestriction object that can be passed to a view initialization:
   #
@@ -130,13 +130,13 @@ class RosySplit < RosyTask
   #
   # returns: VarVarRestriction object
   def RosySplit.make_join_restriction(splitID,  # string: splitlogID
-				      table,    # DBtable object
-				      dataset,  # string: "train", "test"
-				      ttt_obj)  # RosyTrainingTestTable object
+                                      table,    # DBtable object
+                                      dataset,  # string: "train", "test"
+                                      ttt_obj)  # RosyTrainingTestTable object
     return VarVarRestriction.new(table.table_name + "." + table.index_name,
-                                 ttt_obj.splittable_name(splitID, dataset) + "." + RosySplit.split_index_colname())
+                                 ttt_obj.splittable_name(splitID, dataset) + "." + RosySplit.split_index_colname)
   end
   ###########
@@ -149,34 +149,34 @@ class RosySplit < RosyTask
   # that splits the sentences with failed parses
   # into a training and a test part
   # and remembers this split
-  def perform_failed_parses()
-    # read file with failed parses
-    failed_parses_filename =
+  def perform_failed_parses
+    # read file with failed parses
+    failed_parses_filename =
           File.new_filename(@exp.instantiate("rosy_dir",
                                              "exp_ID" => @exp.get("experiment_ID")),
                             @exp.instantiate("failed_file",
                                              "exp_ID" => @exp.get("experiment_ID"),
                                              "split_ID" => "none",
                                              "dataset" => "none"))
-    fp_obj = FailedParses.new()
+    fp_obj = FailedParses.new
     fp_obj.load(failed_parses_filename)
     # split and write to appropriate files
     fp_train_obj, fp_test_obj = fp_obj.make_split(@trainpercent)
-    train_filename =
+    train_filename =
          File.new_filename(@exp.instantiate("rosy_dir",
                                             "exp_ID" => @exp.get("experiment_ID")),
                            @exp.instantiate("failed_file",
                                             "exp_ID" => @exp.get("experiment_ID"),
                                             "split_ID" => @splitID,
                                             "dataset" => "train"))
     fp_train_obj.save(train_filename)
-    test_filename =
+    test_filename =
         File.new_filename(@exp.instantiate("rosy_dir",
                                            "exp_ID" => @exp.get("experiment_ID")),
                           @exp.instantiate("failed_file",
@@ -193,26 +193,26 @@ class RosySplit < RosyTask
   # this is the part of the perform() method
   # that makes the actual split
   # at random and stores it in new database tables
-  def perform_make_split()
+  def perform_make_split
     $stderr.puts "Making split with ID #{@splitID}"
     # get a view of the main table
-    maintable = @ttt_obj.existing_train_table()
+    maintable = @ttt_obj.existing_train_table
     # construct new DB tables for the train and test part of the new split:
-    # get table name and join column name
-    split_train_table = @ttt_obj.new_split_table(@splitID, "train", RosySplit.split_index_colname())
-    split_test_table =  @ttt_obj.new_split_table(@splitID, "test", RosySplit.split_index_colname())
+    # get table name and join column name
+    split_train_table = @ttt_obj.new_split_table(@splitID, "train", RosySplit.split_index_colname)
+    split_test_table =  @ttt_obj.new_split_table(@splitID, "test", RosySplit.split_index_colname)
     # make split: put each sentence ID into either the train or the test table
     # based on whether a random number btw. 0 and 100 is larger than @trainpercent or not
     # go through training data one frame at a time
     iterator = RosyIterator.new(@ttt_obj, @exp, "train", "xwise"=>"frame")
     iterator.each_group { |dummy1, dummy2|
       view = iterator.get_a_view_for_current_group(["sentid", maintable.index_name])
-      view.each_sentence() { |sentence|
+      view.each_sentence { |sentence|
         if rand(100) > @trainpercent
           # put this sentence into the test table
           table = split_test_table
@@ -221,12 +221,14 @@ class RosySplit < RosyTask
           table = split_train_table
         end
         sentence.each { |instance|
-          table.insert_row([[RosySplit.split_index_colname(), instance[maintable.index_name]],
+          table.insert_row([[RosySplit.split_index_colname, instance[maintable.index_name]],
                             ["sentid", instance["sentid"]]])
         }
-      }
-      view.close()
+      }
+      view.close
     }
   end
 end
+end
+end

data/lib/rosy/RosyTask.rb CHANGED Viewed

@@ -3,17 +3,21 @@
 # KE, SP April 05
 #
 # this is the abstract class that describes the interface for
-# the task classes of Rosy.
+# the task classes of Rosy.
 #
 # all task classes should have a perform() method that actually
 # performs the task.
+module Shalmaneser
+module Rosy
 class RosyTask
-  def initialize()
+  def initialize
     raise "Shouldn't be here! I'm an abstract class"
   end
-  def perform()
+  def perform
     raise "Should be overwritten by the inheriting class!"
   end
 end
+end
+end

data/lib/rosy/RosyTest.rb CHANGED Viewed

@@ -8,24 +8,23 @@
 require "tempfile"
 require 'fileutils'
-# Salsa packages
-require "common/Parser"
-require "common/SalsaTigerRegXML"
-require "common/SynInterfaces"
-require "common/ruby_class_extensions"
+# require "SalsaTigerRegXML"
+require 'salsa_tiger_xml/salsa_tiger_sentence'
+require 'salsa_tiger_xml/file_parts_parser'
+require 'external_systems'
+require "ruby_class_extensions"
 # Rosy packages
 require "rosy/FeatureInfo"
-require "common/ML"
-require "common/RosyConventions"
+require 'ml/classifier'
+require 'rosy/rosy_conventions'
 require "rosy/RosyIterator"
 require "rosy/RosyTask"
 require "rosy/RosyTrainingTestTable"
-require "rosy/View"
-# Frprep packages
-#require "common/prep_config_data" # AB: what the fuck???
+# require "rosy/View"
+module Shalmaneser
+module Rosy
 ##########################################################################
 # classifier combination class
 class ClassifierCombination
@@ -38,19 +37,19 @@ class ClassifierCombination
   # combine:
   #
   # given a list of classifier results --
-  # where a classifier result is a list of strings,
+  # where a classifier result is a list of strings,
   # one string (= assigned class) for each instance,
   # and where each list of classifier results has the same length --
   # for each instance, combine individual classifier results
   # into a single judgement
   #
-  # returns: an array of strings: one combined classifier result,
+  # returns: an array of strings: one combined classifier result,
   # one string (=assigned class) for each instance
   def combine(classifier_results) #array:array:string, list of classifier results
-    if classifier_results.length() == 1
+    if classifier_results.length == 1
       return classifier_results.first
-    elsif classifier_results.length() == 0
+    elsif classifier_results.length == 0
       raise "Can't do classification with zero classifiers."
     else
       raise "True classifier combination not implemented yet"
@@ -66,16 +65,16 @@ class RosyTest < RosyTask
   #####
   # new:
-  #
+  #
   # initialize everything for applying classifiers
   #
   # argrec_apply: apply trained argrec classifiers to
   # training data, which means that almost everything is different
   def initialize(exp,      # RosyConfigData object: experiment description
-		 opts,     # hash: runtime argument option (string) -> value (string)
-		 ttt_obj,  # RosyTrainingTestTable object
+                 opts,     # hash: runtime argument option (string) -> value (string)
+                 ttt_obj,  # RosyTrainingTestTable object
                  argrec_apply = false) # boolean. true: see above
     ##
     # remember the experiment description
@@ -89,16 +88,16 @@ class RosyTest < RosyTask
     # defaults:
     @step = "both"
     @splitID = nil
-    @testID = default_test_ID()
+    @testID = ::Shalmaneser::Rosy.default_test_ID
     @produce_output = true
     opts.each { |opt,arg|
       case opt
       when "--step"
-	unless ["argrec", "arglab", "both", "onestep"].include? arg
-	  raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
-	end
-	@step = arg
+        unless ["argrec", "arglab", "both", "onestep"].include? arg
+          raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
+        end
+        @step = arg
       when "--logID"
         @splitID = arg
@@ -110,20 +109,20 @@ class RosyTest < RosyTask
         @produce_output = false
       else
-	# this is an option that is okay but has already been read and used by rosy.rb
-      end
+        # this is an option that is okay but has already been read and used by rosy.rb
+      end
     }
     ##
     # check: if this is about a split, do we have it?
     # if it is about a test, do we have it?
     if @splitID
-      unless @ttt_obj.splitIDs().include?(@splitID)
+      unless @ttt_obj.splitIDs.include?(@splitID)
         $stderr.puts "Sorry, I have no data for split ID #{@splitID}."
         exit 1
       end
     else
-      if not(@argrec_apply) and not(@ttt_obj.testIDs().include?(@testID))
+      if not(@argrec_apply) and not(@ttt_obj.testIDs.include?(@testID))
         $stderr.puts "Sorry, I have no data for test ID #{@testID}."
         exit 1
       end
@@ -142,18 +141,13 @@ class RosyTest < RosyTask
     if @classifiers.empty?
       raise "I need at least one classifier, please specify using exp. file option 'classifier'"
     end
     # make classifier combination object
     @combinator = ClassifierCombination.new(@exp)
     if not(@argrec_apply)
       # normal run
-      #####
-      # Enduser mode: only steps "both" and "onestep" available.
-      # testing only on test data, not on split data
-      in_enduser_mode_ensure(["both", "onestep"].include?(@step))
       ##
       # add preprocessing information to the experiment file object
       # @note AB: Commented out due to separation of PrepConfigData:
@@ -176,7 +170,7 @@ class RosyTest < RosyTask
       #   $stderr.puts "Parameter #{preproc_param} has to be a readable file."
       #   exit 1
       # end
-      # preproc_exp = FrPrepConfigData.new(preproc_expname)
+      # preproc_exp = FrappeConfigData.new(preproc_expname)
       # @exp.adjoin(preproc_exp)
       # announce the task
@@ -196,38 +190,25 @@ class RosyTest < RosyTask
   # perform
   #
   # apply trained classifiers to the given (test) data
-  def perform()
+  def perform
     if @step == "both"
       # both? then do first argrec, then arglab
       $stderr.puts "Rosy testing step argrec"
       previous_produce_output = @produce_output # no output in argrec
       @produce_output = false  # when performing both steps in a row
       @step = "argrec"
-      perform_aux()
+      perform_aux
       $stderr.puts "Rosy testing step arglab"
       @produce_output = previous_produce_output
       @step = "arglab"
-      perform_aux()
+      perform_aux
     else
       # not both? then just do one
       $stderr.puts "Rosy testing step " + @step
-      perform_aux()
-    end
-    ####
-    # Enduser mode: remove DB table with test data
-    if $ENDUSER_MODE
-      $stderr.puts "---"
-      $stderr.puts "Cleanup: Removing DB table with test data."
-      unless @testID
-        raise "Shouldn't be here"
-      end
-      @ttt_obj.remove_test_table(@testID)
+      perform_aux
     end
   end
@@ -237,7 +218,7 @@ class RosyTest < RosyTask
   # returns the column name for the current run,
   # i.e. the name of the column where this object's perform method
   # writes its data
-  def get_result_column_name()
+  def get_result_column_name
     return @run_column
   end
@@ -247,91 +228,91 @@ class RosyTest < RosyTask
   # perform_aux: do the actual work of the perform() method
   # moved here because of the possibility of having @step=="both",
   # which makes it necessary to perform two test steps one after the other
-  def perform_aux()
+  def perform_aux
     @iterator, @run_column = get_iterator(true)
     ####
     # get the list of relevant features,
-    # remove the features that describe the unit by which we train,
+    # remove the features that describe the unit by which we train,
     # since they are going to be constant throughout the training file
-    @features = @ttt_obj.feature_info.get_model_features(@step) -
-                @iterator.get_xwise_column_names()
+    @features = @ttt_obj.feature_info.get_model_features(@step) -
+                @iterator.get_xwise_column_names
     # but add the gold feature
     unless @features.include? "gold"
       @features << "gold"
     end
     ####
     # for each group (as defined by the @iterator):
     # apply the group-specific classifier,
-    # write the result into the database, into
+    # write the result into the database, into
     # the column named @run_column
-    classif_dir = classifier_directory_name(@exp, @step, @splitID)
+    classif_dir = ::Shalmaneser::Rosy::classifier_directory_name(@exp, @step, @splitID)
     @iterator.each_group { |group_descr_hash, group|
       $stderr.puts "Applying classifiers to: " + group.to_s
       # get data for current group from database:
       # make a view: model features
       feature_view = @iterator.get_a_view_for_current_group(@features)
-	if feature_view.length() == 0
+        if feature_view.length == 0
         # no test data in this view: next group
-        feature_view.close()
+        feature_view.close
         next
       end
       # another view for writing the result
       result_view = @iterator.get_a_view_for_current_group([@run_column])
       # read trained classifiers
       # classifiers_read_okay: boolean, true if reading the stored classifier(s) succeeded
       classifiers_read_okay = true
-      @classifiers.each { |classifier, classifier_name|
-        stored_classifier = classif_dir +
+      @classifiers.each { |classifier, classifier_name|
+        stored_classifier = classif_dir +
               @exp.instantiate("classifier_file",
                                "classif" => classifier_name,
                                        "group" => group.gsub(/ /, "_"))
         status = classifier.read(stored_classifier)
         unless status
           STDERR.puts "[RosyTest] Error: could not read classifier."
           classifiers_read_okay = false
         end
       }
-      classification_result = Array.new
-      if classifiers_read_okay
+      classification_result = []
+      if classifiers_read_okay
         # apply classifiers, write result to database
         classification_result = apply_classifiers(feature_view, group, "test")
       end
-      if classification_result == Array.new
-        # either classifiers did not read OK, or some problem during classification:
+      if classification_result == []
+        # either classifiers did not read OK, or some problem during classification:
         # label everything with NONE
         result_view.each_instance_s {|inst|
           classification_result << @exp.get("noval")
-        }
+        }
       end
-      result_view.update_column(@run_column,
+      result_view.update_column(@run_column,
                                 classification_result)
-      feature_view.close()
-      result_view.close()
+      feature_view.close
+      result_view.close
     }
     # pruning? then set the result for pruned nodes to "noval"
     # if we are doing argrec or onestep
-    integrate_pruning_into_argrec_result()
+    integrate_pruning_into_argrec_result
     # postprocessing:
     # remove superfluous role labels, i.e. labels on nodes
@@ -346,18 +327,18 @@ class RosyTest < RosyTask
       @postprocessing_iterator.each_group { |group_descr_hash, group|
-	view = @postprocessing_iterator.get_a_view_for_current_group(["nodeID", "sentid", @run_column])
-	# remove superfluous labels, write the result back to the DB
-	postprocess_classification(view, @run_column)
-	view.close()
+        view = @postprocessing_iterator.get_a_view_for_current_group(["nodeID", "sentid", @run_column])
+        # remove superfluous labels, write the result back to the DB
+        postprocess_classification(view, @run_column)
+        view.close
       }
     end
     # all went well, so confirm this run
     if @argrec_apply
-      # argrec_apply: don't add preprocessing info again, and
+      # argrec_apply: don't add preprocessing info again, and
       # get view maker for the training data
       @ttt_obj.confirm_runlog("argrec", "train", @testID, @splitID, @run_column)
     else
@@ -369,7 +350,7 @@ class RosyTest < RosyTask
     # If we are being asked to produce SalsaTigerXML output:
     # produce it.
     if @produce_output
-      write_stxml_output()
+      write_stxml_output
     end
   end
@@ -386,8 +367,8 @@ class RosyTest < RosyTask
     #
     if @argrec_apply
       # get view maker for the training data
-      iterator = RosyIterator.new(@ttt_obj, @exp, "train",
-                                   "step" => @step,
+      iterator = RosyIterator.new(@ttt_obj, @exp, "train",
+                                   "step" => @step,
                                    "splitID" => @splitID,
                                    "prune" => prune)
       run_column = @ttt_obj.new_runlog("argrec", "train", @testID, @splitID)
@@ -397,9 +378,9 @@ class RosyTest < RosyTask
       # hand all the info to the RosyIterator object
       # It will figure out what view I'll need
-      iterator = RosyIterator.new(@ttt_obj, @exp, "test",
-                                  "step" => @step,
-                                  "testID" => @testID,
+      iterator = RosyIterator.new(@ttt_obj, @exp, "test",
+                                  "step" => @step,
+                                  "testID" => @testID,
                                   "splitID" => @splitID,
                                   "prune" => prune)
@@ -411,7 +392,7 @@ class RosyTest < RosyTask
   #########################
   # integrate pruning result into argrec result
-  def integrate_pruning_into_argrec_result()
+  def integrate_pruning_into_argrec_result
     if ["argrec", "onestep"].include? @step
       # we only need to integrate pruning results into argument recognition
@@ -425,39 +406,39 @@ class RosyTest < RosyTask
   def apply_classifiers(view,  # DBView object: data to be classified
                         group,       # string: frame or target POS we are classifying
                         dataset)     # string: train/test
     # make input file for classifiers
     tf_input = Tempfile.new("rosy")
     view.each_instance_s { |instance_string|
       # change punctuation to _PUNCT_
       # and change empty space to _
       # because otherwise some classifiers may spit
-      tf_input.puts prepare_output_for_classifiers(instance_string)
+      tf_input.puts ::Shalmaneser::Rosy::prepare_output_for_classifiers(instance_string)
     }
-    tf_input.close()
+    tf_input.close
     # make output file for classifiers
     tf_output = Tempfile.new("rosy")
-    tf_output.close()
+    tf_output.close
     ###
     # apply classifiers
     # classifier_results: array:array of strings, a list of classifier results,
     # each result a list of assigned classes(string), one class for each instance of the view
-    classifier_results = Array.new
+    classifier_results = []
     @classifiers.each { |classifier, classifier_name|
-      # did we manage to classify the test data?
-      # there may be errors on the way (eg no training data)
-      success = classifier.apply(tf_input.path(), tf_output.path())
+      # did we manage to classify the test data?
+      # there may be errors on the way (eg no training data)
+      success = classifier.apply(tf_input.path, tf_output.path)
       if success
         # read classifier output from file
-        classifier_results << classifier.read_resultfile(tf_output.path()).map { |instance_result|
+        classifier_results << classifier.read_resultfile(tf_output.path).map { |instance_result|
           # instance_result is a list of pairs [label, confidence]
           # such that the label with the highest confidence is first
           if instance_result.empty?
@@ -465,18 +446,18 @@ class RosyTest < RosyTask
             nil
           else
             # label of the first label/confidence pair
-            instance_result.first().first()
+            instance_result.first.first
           end
-        }.compact()
+        }.compact
       else
         # error: return empty Array, so that error handling can take over in perform_aux()
-        return Array.new
+        return []
       end
     }
-    # if we are here, all classifiers have succeeded...
+    # if we are here, all classifiers have succeeded...
     # clean up
     tf_input.close(true)
     tf_output.close(true)
@@ -497,7 +478,7 @@ class RosyTest < RosyTask
   #       \
   #        FE
   #
-  # to
+  # to
   #    FE
   #   /  \
   #       ...
@@ -509,18 +490,18 @@ class RosyTest < RosyTask
     # keep new values for run_column for all rows in view
     # will be used for update in the end
-    result = Array.new()
+    result = []
-    view.each_sentence() { |sentence|
+    view.each_sentence { |sentence|
-      # returns hash:
+      # returns hash:
       # node index -> array of node indices: ancestors of the given node
       # indices are indices in the 'sentence' array
       ancestors = make_ancestor_hash(sentence)
       # test output
 #       $stderr.puts "nodeID values:"
-#       sentence.each_with_index  { |inst, index|
+#       sentence.each_with_index  { |inst, index|
 #         $stderr.puts "#{index}) #{inst["nodeID"]}"
 #       }
 #       $stderr.puts "\nAncestor hash:"
@@ -532,27 +513,27 @@ class RosyTest < RosyTask
       sentence.each_with_index { |instance, inst_index|
-	# check whether this instance has an equally labeled ancestor
-	has_equally_labeled_ancestor = false
-	if (instance[run_column] != @exp.get("noval")) and
-	  ancestors[inst_index]
-	  if ancestors[inst_index].detect { |anc_index|
-	      sentence[anc_index][run_column] == instance[run_column]
-	    }
-	    has_equally_labeled_ancestor = true
-	  else
-	    has_equally_labeled_ancestor = false
-	  end
-	end
-	if has_equally_labeled_ancestor
-	  result << @exp.get("noval")
-	else
-	  result << instance[run_column]
-	end
+        # check whether this instance has an equally labeled ancestor
+        has_equally_labeled_ancestor = false
+        if (instance[run_column] != @exp.get("noval")) and
+          ancestors[inst_index]
+          if ancestors[inst_index].detect { |anc_index|
+              sentence[anc_index][run_column] == instance[run_column]
+            }
+            has_equally_labeled_ancestor = true
+          else
+            has_equally_labeled_ancestor = false
+          end
+        end
+        if has_equally_labeled_ancestor
+          result << @exp.get("noval")
+        else
+          result << instance[run_column]
+        end
       }
     }
@@ -560,16 +541,16 @@ class RosyTest < RosyTask
 #     # checking: how many labels have we deleted?
 #     before = 0
 #     view.each_sentence { |s|
-#       s.each { |inst|
-# 	unless inst[run_column] == @exp.get("noval")
-# 	  before += 1
-# 	end
+#       s.each { |inst|
+#       unless inst[run_column] == @exp.get("noval")
+#         before += 1
+#       end
 #       }
 #     }
 #     after = 0
-#     result.each { |r|
+#     result.each { |r|
 #       unless r == @exp.get("noval")
-# 	after += 1
+#       after += 1
 #       end
 #     }
 #     $stderr.puts "Non-NONE labels before: #{before}"
@@ -593,15 +574,15 @@ class RosyTest < RosyTask
   def make_ancestor_hash(sentence) # array:hash: column_name(string) -> column_value(object)
     # for each instance: find the parent
     # and store it in the parent_index hash
-    parent_index = Hash.new
+    parent_index = {}
-    # first make hash mapping each node ID to its index in the
+    # first make hash mapping each node ID to its index in the
     # 'sentence' array
-    id_to_index = Hash.new()
+    id_to_index = {}
     sentence.each_with_index { |instance, index|
       if instance["nodeID"]
-        myID, parentID = instance["nodeID"].split()
+        myID, parentID = instance["nodeID"].split
         id_to_index[myID] = index
       else
         $stderr.puts "WARNING: no node ID for instance:\n"
@@ -612,7 +593,7 @@ class RosyTest < RosyTask
     # now make hash mapping each node index to its parent index
     sentence.each { |instance|
       if instance["nodeID"]
-        myID, parentID = instance["nodeID"].split()
+        myID, parentID = instance["nodeID"].split
         if parentID # root has no parent ID
           # sanity check: do I know the indices?
@@ -630,14 +611,14 @@ class RosyTest < RosyTask
     # for each instance: gather ancestor IDs
     # and store them in the ancestor_index hash
-    ancestor_index = Hash.new
+    ancestor_index = {}
     parent_index.each_key { |node_index|
-      ancestor_index[node_index] = Array.new
+      ancestor_index[node_index] = []
       ancestor = parent_index[node_index]
       while ancestor
-        if ancestor_index[node_index].include? ancestor
+        if ancestor_index[node_index].include? ancestor
           # we seem to have run into a loop
           # this should not happen, but it has happened anyway ;-)
 #          STDERR.puts "Warning: node #{ancestor} is its own ancestor!"
@@ -655,12 +636,12 @@ class RosyTest < RosyTask
   #
   # Output the result of Rosy as SalsaTigerXML:
   # Take the input SalsaTigerXML data,
-  # and write them to directory_output
+  # and write them to directory_output
   # (or, lacking that, to <rosy_dir>/<experiment_ID>/output),
   # taking over the frames from the input data
   # and supplanting any FEs that might be set in the input data
   # by the ones newly assigned by Rosy.
-  def write_stxml_output()
+  def write_stxml_output
     ##
     # determine input and output directory
@@ -674,7 +655,7 @@ class RosyTest < RosyTask
       input_directory = File.existing_dir(rosy_dir, "input_dir/test")
     end
     if @exp.get("directory_output")
       # user has set an explicit output directory
       output_directory = File.new_dir(@exp.get("directory_output"))
@@ -682,11 +663,11 @@ class RosyTest < RosyTask
       # no output directory has been set: use default
       output_directory = File.new_dir(@exp.instantiate("rosy_dir", "exp_ID" => @exp.get("experiment_ID")),
                                       "output")
-    end
+    end
     ###
     # find appropriate class for interpreting syntactic structures
-    interpreter_class = SynInterfaces.get_interpreter_according_to_exp(@exp)
+    interpreter_class = ::Shalmaneser::ExternalSystems.get_interpreter_according_to_exp(@exp)
     $stderr.puts "Writing SalsaTigerXML output to #{output_directory}"
@@ -694,16 +675,16 @@ class RosyTest < RosyTask
     ###
     # read in all FEs that have been assigned
     # sentid_to_assigned: hash <sent ID, frame ID> (string) -> array of pairs [FE, node ID]
-    sentid_to_assigned = Hash.new
+    sentid_to_assigned = {}
     @iterator.each_group { |group_descr_hash, group|
       view = @iterator.get_a_view_for_current_group([@run_column, "nodeID", "sentid"])
       view.each_hash { |inst_hash|
         # if this sentence ID/frame ID pair is in the test data,
-        # its hash entry will at least be nonnil, even if no
+        # its hash entry will at least be nonnil, even if no
         # FEs have been assigned for it
         unless sentid_to_assigned[inst_hash["sentid"]]
-          sentid_to_assigned[inst_hash["sentid"]] = Array.new
+          sentid_to_assigned[inst_hash["sentid"]] = []
         end
         # if nothing has been assigned to this instance, don't record it
@@ -714,7 +695,7 @@ class RosyTest < RosyTask
         # record instance
         sentid_to_assigned[inst_hash["sentid"]] << [inst_hash[@run_column], inst_hash["nodeID"]]
       }
-      view.close()
+      view.close
     }
     ###
@@ -726,11 +707,11 @@ class RosyTest < RosyTask
       # unpack input file
       tempfile = Tempfile.new("RosyTest")
-      tempfile.close()
-      %x{gunzip -c #{infilename} > #{tempfile.path()}}
+      tempfile.close
+      %x{gunzip -c #{infilename} > #{tempfile.path}}
       # open input and output file
-      infile = FilePartsParser.new(tempfile.path())
+      infile = STXML::FilePartsParser.new(tempfile.path)
       outfilename = output_directory + File.basename(infilename, ".gz")
       begin
         outfile = File.new(outfilename, "w")
@@ -739,35 +720,35 @@ class RosyTest < RosyTask
       end
       # write header to output file
-      outfile.puts infile.head()
+      outfile.puts infile.head
       ##
       # each input sentence: integrate newly assigned roles
       infile.scan_s { |sent_string|
-        sent = SalsaTigerSentence.new(sent_string)
+        sent = STXML::SalsaTigerSentence.new(sent_string)
         ##
         # each input frame: remove old roles, add new ones
         sent.frames.each { |frame|
           # this corresponds to the sentid feature in the database
-          sent_frame_id = construct_instance_id(sent.id(), frame.id())
+          sent_frame_id = ::Shalmaneser::Rosy::construct_instance_id(sent.id, frame.id)
           if sentid_to_assigned[sent_frame_id].nil? and @splitID
-	    # we are using a split of the training data, and
+            # we are using a split of the training data, and
             # this sentence/frame ID pair does not
             # seem to be in the test part of the split
             # so do not show the frame
-	    #
-	    # Note that if we are _not_ working on a split,
-	    # we are not discarding any frames or sentences
+            #
+            # Note that if we are _not_ working on a split,
+            # we are not discarding any frames or sentences
             sent.remove_frame(frame)
           end
           # remove old roles, but do not remove target
-          old_fes = frame.children()
+          old_fes = frame.children
           old_fes.each { |old_fe|
-            unless old_fe.name() == "target"
+            unless old_fe.name == "target"
               frame.remove_child(old_fe)
             end
           }
@@ -784,14 +765,14 @@ class RosyTest < RosyTask
           sentid_to_assigned[sent_frame_id].map { |fe_name, npp| fe_name }.uniq.each { |fe_name|
             # each FE
-            nodes = sentid_to_assigned[sent_frame_id].select { |other_fe_name, npp|
+            nodes = sentid_to_assigned[sent_frame_id].select { |other_fe_name, npp|
               # collect node ID / parentnode ID pairs listed for that FE
               other_fe_name == fe_name
             }.map { |other_fe_name, nodeid_plus_parent_id|
               # map the node ID / parentnode ID pair to an actual node
-              node_id, parent_id = nodeid_plus_parent_id.split()
+              node_id, parent_id = nodeid_plus_parent_id.split
               if node_id == @exp.get("noval")
                 $stderr.puts "Warning: got NONE for a node ID"
                 node = nil
@@ -805,7 +786,7 @@ class RosyTest < RosyTask
               node
             }.compact
             # assign the FE
             sent.add_fe(frame, fe_name, interpreter_class.max_constituents(nodes, sent))
           } # each FE
@@ -813,17 +794,19 @@ class RosyTest < RosyTask
         # write changed sentence to output file
         # if we are working on a split of the training data,
-	# write the sentence only if there are frames in it
-	if sent.frames.length() == 0 and @splitID
-	  # split of the training data, and no frames
-	else
-          outfile.puts sent.get()
+        # write the sentence only if there are frames in it
+        if sent.frames.length == 0 and @splitID
+          # split of the training data, and no frames
+        else
+          outfile.puts sent.get
         end
       } # each sentence
       # write footer to output file
-      outfile.puts infile.tail()
+      outfile.puts infile.tail
       tempfile.close(true)
     } # each input file
   end
 end
+end
+end