RubyGems - shalmaneser - Versions diffs - 0.0.1.alpha - Mend

shalmaneser 0.0.1.alpha

Files changed (138) hide show

data/.yardopts +8 -0
data/CHANGELOG.rdoc +0 -0
data/LICENSE.rdoc +0 -0
data/README.rdoc +0 -0
data/lib/common/AbstractSynInterface.rb +1227 -0
data/lib/common/BerkeleyInterface.rb +375 -0
data/lib/common/CollinsInterface.rb +1165 -0
data/lib/common/ConfigData.rb +694 -0
data/lib/common/Counter.rb +18 -0
data/lib/common/DBInterface.rb +48 -0
data/lib/common/EnduserMode.rb +27 -0
data/lib/common/Eval.rb +480 -0
data/lib/common/FixSynSemMapping.rb +196 -0
data/lib/common/FrPrepConfigData.rb +66 -0
data/lib/common/FrprepHelper.rb +1324 -0
data/lib/common/Graph.rb +345 -0
data/lib/common/ISO-8859-1.rb +24 -0
data/lib/common/ML.rb +186 -0
data/lib/common/Maxent.rb +215 -0
data/lib/common/MiniparInterface.rb +1388 -0
data/lib/common/Optimise.rb +195 -0
data/lib/common/Parser.rb +213 -0
data/lib/common/RegXML.rb +269 -0
data/lib/common/RosyConventions.rb +171 -0
data/lib/common/SQLQuery.rb +243 -0
data/lib/common/STXmlTerminalOrder.rb +194 -0
data/lib/common/SalsaTigerRegXML.rb +2347 -0
data/lib/common/SalsaTigerXMLHelper.rb +99 -0
data/lib/common/SleepyInterface.rb +384 -0
data/lib/common/SynInterfaces.rb +275 -0
data/lib/common/TabFormat.rb +720 -0
data/lib/common/Tiger.rb +1448 -0
data/lib/common/TntInterface.rb +44 -0
data/lib/common/Tree.rb +61 -0
data/lib/common/TreetaggerInterface.rb +303 -0
data/lib/common/headz.rb +338 -0
data/lib/common/option_parser.rb +13 -0
data/lib/common/ruby_class_extensions.rb +310 -0
data/lib/fred/Baseline.rb +150 -0
data/lib/fred/FileZipped.rb +31 -0
data/lib/fred/FredBOWContext.rb +863 -0
data/lib/fred/FredConfigData.rb +182 -0
data/lib/fred/FredConventions.rb +232 -0
data/lib/fred/FredDetermineTargets.rb +324 -0
data/lib/fred/FredEval.rb +312 -0
data/lib/fred/FredFeatureExtractors.rb +321 -0
data/lib/fred/FredFeatures.rb +1061 -0
data/lib/fred/FredFeaturize.rb +596 -0
data/lib/fred/FredNumTrainingSenses.rb +27 -0
data/lib/fred/FredParameters.rb +402 -0
data/lib/fred/FredSplit.rb +84 -0
data/lib/fred/FredSplitPkg.rb +180 -0
data/lib/fred/FredTest.rb +607 -0
data/lib/fred/FredTrain.rb +144 -0
data/lib/fred/PlotAndREval.rb +480 -0
data/lib/fred/fred.rb +45 -0
data/lib/fred/md5.rb +23 -0
data/lib/fred/opt_parser.rb +250 -0
data/lib/frprep/AbstractSynInterface.rb +1227 -0
data/lib/frprep/Ampersand.rb +37 -0
data/lib/frprep/BerkeleyInterface.rb +375 -0
data/lib/frprep/CollinsInterface.rb +1165 -0
data/lib/frprep/ConfigData.rb +694 -0
data/lib/frprep/Counter.rb +18 -0
data/lib/frprep/FNCorpusXML.rb +643 -0
data/lib/frprep/FNDatabase.rb +144 -0
data/lib/frprep/FixSynSemMapping.rb +196 -0
data/lib/frprep/FrPrepConfigData.rb +66 -0
data/lib/frprep/FrameXML.rb +513 -0
data/lib/frprep/FrprepHelper.rb +1324 -0
data/lib/frprep/Graph.rb +345 -0
data/lib/frprep/ISO-8859-1.rb +24 -0
data/lib/frprep/MiniparInterface.rb +1388 -0
data/lib/frprep/Parser.rb +213 -0
data/lib/frprep/RegXML.rb +269 -0
data/lib/frprep/STXmlTerminalOrder.rb +194 -0
data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
data/lib/frprep/SleepyInterface.rb +384 -0
data/lib/frprep/SynInterfaces.rb +275 -0
data/lib/frprep/TabFormat.rb +720 -0
data/lib/frprep/Tiger.rb +1448 -0
data/lib/frprep/TntInterface.rb +44 -0
data/lib/frprep/Tree.rb +61 -0
data/lib/frprep/TreetaggerInterface.rb +303 -0
data/lib/frprep/do_parses.rb +142 -0
data/lib/frprep/frprep.rb +686 -0
data/lib/frprep/headz.rb +338 -0
data/lib/frprep/one_parsed_file.rb +28 -0
data/lib/frprep/opt_parser.rb +94 -0
data/lib/frprep/ruby_class_extensions.rb +310 -0
data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
data/lib/rosy/DBMySQL.rb +146 -0
data/lib/rosy/DBSQLite.rb +280 -0
data/lib/rosy/DBTable.rb +239 -0
data/lib/rosy/DBWrapper.rb +176 -0
data/lib/rosy/ExternalConfigData.rb +58 -0
data/lib/rosy/FailedParses.rb +130 -0
data/lib/rosy/FeatureInfo.rb +242 -0
data/lib/rosy/GfInduce.rb +1115 -0
data/lib/rosy/GfInduceFeature.rb +148 -0
data/lib/rosy/InputData.rb +294 -0
data/lib/rosy/RosyConfigData.rb +115 -0
data/lib/rosy/RosyConfusability.rb +338 -0
data/lib/rosy/RosyEval.rb +465 -0
data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
data/lib/rosy/RosyFeaturize.rb +280 -0
data/lib/rosy/RosyInspect.rb +336 -0
data/lib/rosy/RosyIterator.rb +477 -0
data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
data/lib/rosy/RosyPruning.rb +165 -0
data/lib/rosy/RosyServices.rb +744 -0
data/lib/rosy/RosySplit.rb +232 -0
data/lib/rosy/RosyTask.rb +19 -0
data/lib/rosy/RosyTest.rb +826 -0
data/lib/rosy/RosyTrain.rb +232 -0
data/lib/rosy/RosyTrainingTestTable.rb +786 -0
data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
data/lib/rosy/View.rb +418 -0
data/lib/rosy/opt_parser.rb +379 -0
data/lib/rosy/rosy.rb +77 -0
data/lib/shalmaneser/version.rb +3 -0
data/test/frprep/test_opt_parser.rb +94 -0
data/test/functional/functional_test_helper.rb +40 -0
data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
data/test/functional/test_fred.rb +47 -0
data/test/functional/test_frprep.rb +52 -0
data/test/functional/test_rosy.rb +20 -0
metadata +284 -0

data/lib/rosy/RosyConfusability.rb ADDED Viewed

@@ -0,0 +1,338 @@
+# RosyConfusability
+# KE May 05
+#
+# Access instance database created by the Rosy role assignment system
+# and compute the confusability of target categories
+# for the data in the (training) database there.
+#
+# We define confusability as follows:
+# Given a frame fr, let
+# - fes(fr) the FEs of fr (a set)
+# - gfs(fe) the grammatical functions realizing the FE fe in the data
+# - gfs(fr) = U_{fe \in fes(fr)} gfs(fe) the grammatical functions realizing roles of fr
+#
+# Then the entropy of a grammatical function gf within fr is
+#
+# gfe_{fr}(gf) = \sum_{fe \in fes(fr)} -p(fe|gf) log p(fe|gf)
+#
+# where p(fe|gf) = f(gf, fe) / f(gf)
+#
+# And the confusability of a frame element fe of fr is
+#
+# c_{fr}(fe) = \sum_{gf \in gfs(fr)} p(gf|fe) gfe_{fr}(gf)
+#
+# where p(gf|fe) = f(gf, fe) / f(fe)
+require "RosyConfigData"
+require "RosyIterator"
+require "RosyConventions"
+require "TargetsMostFrequentFrame"
+require "mysql"
+class RosyConfusability
+  include TargetsMostFrequentSc
+  attr_reader :confusability, :counts_fe_glob, :frame_confusability, :overall_confusability
+  def initialize(exp) # RosyConfigData object
+    @exp = exp
+    @confusability = Hash.new(0.0)
+    @counts_fe_glob = Hash.new(0)
+    @counts_gffe_glob = Hash.new(0)
+    @frame_confusability = Hash.new(0.0)
+    @overall_confusability = 0.0
+    @frequent_gframes = [
+      # NO DUPLICATES
+      "Ext_Comp", "Mod", "Comp", "Gen",
+      "Ext_Obj", "Ext", "Ext_Obj_Comp", "Head",
+      "Ext_Mod", "Gen_Mod", "Mod_Comp", "Comp_Ext",
+      "Gen_Comp", "Ext_Gen", "Ext_Mod_Comp", "Head_Comp",
+      "Obj_Comp", "Obj", "Mod_Head", "Ext_Comp_Obj",
+      "Gen_Head", "Ext_Gen_Mod"
+      # with duplicates
+#       "Ext_Comp", "Mod", "Comp", "Gen",
+#       "Ext_Obj", "Ext", "", "Ext_Obj_Comp",
+#       "Ext_Comp_Comp", "Head", "Mod_Mod", "Gen_Mod",
+#       "Ext_Mod", "Comp_Comp", "Mod_Comp", "Ext_Gen",
+#       "Gen_Comp", "Head_Head", "Ext_Comp_Comp_Comp", "Head_Comp",
+# # "Ext_Ext_Comp",
+# #       "Ext_Obj_Comp_Comp", "Obj_Comp",
+# #       "Ext_Mod_Mod", "Comp_Comp_Comp",
+# #       "Ext_Ext_Obj", "Ext_Mod_Comp", "Comp_Ext", "Obj",
+# #       "Ext_Ext", "Ext_Obj_Obj", "Mod_Mod_Mod", "Gen_Mod_Mod",
+# #       "Ext_Comp_Comp_Comp_Comp", "Gen_Head", "Mod_Head",
+# #       "Ext_Ext_Ext_Comp"
+    ].map { |string|
+      string.split("_")
+    }
+  end
+  def compute(splitID,     # string: split ID, may be nil
+              additionals) # array:string: "target", "target_pos", "gframe", "fgframe"
+    ###
+    # open and initialize stuff:
+    # open database
+    database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
+                                  @exp.get('passwd'), @exp.get('dbname'))
+    # make an object that creates views.
+    # read one frame at a time.
+    iterator = RosyIterator.new(database, @exp, "train",
+                                "splitID" => splitID,
+                                "xwise" => "frame")
+    # get value for "no val"
+    noval = @exp.get("noval")
+    counts_frame = Hash.new(0)
+    # iterate through all frames and compute confusability of each FE
+    iterator.each_group { |group_descr_hash, frame|
+      $stderr.puts "Computing confusability for #{frame}"
+      # read all instances of the frame, columns: FE and GF
+      view = iterator.get_a_view_for_current_group(["sentid","gold", "fn_gf",
+                                                    "target","target_pos", "frame"])
+      if additionals.include? "tmfframe"
+        # find most frequent gframe for each target
+        tmfframe = determine_target_most_frequent_sc(view, noval)
+      end
+      # count occurrences
+      counts_gf = Hash.new(0)
+      counts_fe = Hash.new(0)
+      counts_gffe = Hash.new(0)
+      view.each_sentence { |sentence|
+        # make string consisting of all FN GFs of this sentence
+        allgfs = Array.new()
+        sentence.each { |inst|
+          if inst["fn_gf"] != noval
+            allgfs << inst["fn_gf"]
+          end
+        }
+        # assume uniqueness of GFs
+        # design decision, could also be done differently.
+        # rationale: if a GF occurs more than once,
+        # it's probable that this is because we get more than
+        # one constituent for this GF, not because
+        # it actually occurred more than once in the
+        # original FrameNet annotation.
+        allgfs.uniq!
+        # now count each instance
+        sentence.each { |row|
+          if row["gold"] == "target"
+            # don't count target among the FEs
+            next
+          end
+          if row["gold"] != noval
+            counts_fe[row["gold"]] += 1
+          end
+          if row["fn_gf"] != noval and row["fn_gf"] != "target"
+            gf = row["fn_gf"]
+            additionals.each { |additional|
+              case additional
+              when "target"
+                gf << "_" + row["target"]
+              when "target_pos"
+                gf << "_" + row["target_pos"]
+              when "gframe"
+                gf << "_" + allgfs.join("_")
+              when "fgframe"
+                # find the maximal frequent frame subsuming allgfs
+                maxfgf = nil
+                @frequent_gframes.each { |fgframe|
+                  if fgframe.subsumed_by?(allgfs)
+                    # fgframe is a subset of allgfs
+                    if maxfgf.nil? or fgframe.length() > maxfgf.length()
+                      maxfgf = fgframe
+                    end
+                  end
+                }
+                if maxfgf.nil?
+                  # nothing there that fits
+                  # leave GF as is
+                else
+                  gf << "_" + maxfgf.join("_")
+                end
+              when "tmfframe"
+                gf << "_" + tmfframe[tmf_target_key(row)]
+              else
+                raise "Don't know how to compute #{additional}"
+              end
+            }
+            counts_gf[gf] += 1
+          end
+          if row["gold"] != noval and gf
+            counts_gffe[gf + " " + row["gold"]] += 1
+          end
+        } # each row of sentence
+      } # each sentence of view
+      # compute gf entropy
+      # gfe_{fr}(gf) = \sum_{fe \in fes(fr)} -p(fe|gf) log_2 p(fe|gf)
+      #
+      # where p(fe|gf) = f(gf, fe) / f(gf)
+      gf_entropy = Hash.new
+      counts_gf.keys.each { |gf|
+        gf_entropy[gf] = 0.0
+        counts_fe.keys.each { |fe|
+          if counts_gf[gf] > 0
+            p_gf_fe = counts_gffe[gf + " " + fe].to_f / counts_gf[gf].to_f
+            # get log_2 via log_10
+            if p_gf_fe > 0.0
+              gf_entropy[gf] -= p_gf_fe * Math.log10(p_gf_fe) * 3.32193
+            end
+          end
+        } # each FE for this GF
+      } # each GF (gf entropy)
+      # compute FE confusability
+      # c_{fr}(fe) = \sum_{gf \in gfs(fr)} p(gf|fe) gfe_{fr}(gf)
+      #
+      # where p(gf|fe) = f(gf, fe) / f(fe)
+      counts_fe.keys.each { |fe|
+        @confusability[frame + " " + fe] = 0.0
+        counts_gf.keys.each { |gf|
+          if counts_fe[fe] > 0
+            p_fe_gf = counts_gffe[gf + " " + fe].to_f / counts_fe[fe].to_f
+            @confusability[frame + " " + fe] += p_fe_gf * gf_entropy[gf]
+          end
+        } # each GF for this FE
+      } # each FE (fe confusability)
+      # remember counts for FEs and GF/FE pairs
+      counts_fe.keys.each { |fe|
+        @counts_fe_glob[frame + " " + fe] = counts_fe[fe]
+      }
+      counts_gffe.each_pair {|event,freq|
+        @counts_gffe_glob[frame+" " +event] = freq
+      }
+      # omit rare FEs:
+      # anything below 5 occurrences
+      counts_fe.each_key {  |fe|
+        if counts_fe[fe] < 5
+          @confusability.delete(frame + " " + fe)
+        end
+      }
+      # compute overall frame confusability
+      # omitting rare FEs with below 5 occurrences:
+      #
+      # c(fr) = sum_{fe \in fes(fr)} f(fe)/f(fr) * c_{fr}(fe)
+      #       = \sum_{gf \in gfs(fr)} p(gf|fr) gfe_{fr}(gf)
+      #
+      # where p(gf|fr) = (sum_{fe\in fes(fr)} f(gf, fe)) / f(fr)
+      counts_frame[frame] = 0
+      counts_fe.each_value { |count|
+        if count >= 5
+          counts_frame[frame] += count
+        end
+      }
+      @frame_confusability[frame] = 0.0
+      counts_fe.each_pair { |fe, count|
+        if count >= 5
+          @frame_confusability[frame] += (count.to_f / counts_frame[frame].to_f) * @confusability[frame + " " + fe]
+        end
+      }
+    } # each frame
+    # compute overall confusability
+    # c = \sum{fr \in frames} f(fr)/N * c(fr)
+    #
+    # where N is the number of FE occurrences overall
+    counts_overall = 0
+    counts_frame.each_value { |count|
+      counts_overall += count
+    }
+    @overall_confusability = 0.0
+    counts_frame.each_pair { |frame, count|
+      @overall_confusability += (count.to_f / counts_overall.to_f) * @frame_confusability[frame]
+    }
+  end
+  # return a copy of @counts_fe_glob, from which all fes with less than 5 occurrences are deleted
+  def get_global_counts
+    global_counts = @counts_fe_glob.clone
+    global_counts.delete_if {|key, value| value < 5}
+    return global_counts
+  end
+  ###
+  #
+  # compute sparseness statistics over the set of
+  # base events used for computing the confusability
+  # returns an array of length 4:
+  # - number of events with freq 1
+  # - number of events with freq 2
+  # - number of events with freq 3-5
+  # - number of events with freq > 5
+  def counts()
+    counts = [0,0,0,0]
+    @counts_gffe_glob.each_value {|freq|
+      case freq
+      when 1
+        counts[0] += 1
+      when 2
+        counts[1] += 1
+      when 3..5
+        counts[2] += 1
+      else
+        counts[3] += 1
+      end
+    }
+    return counts
+  end
+  def to_file(filename)
+    begin
+      file = File.new(filename,"w")
+    rescue
+      raise "Couldn't open file #{filename} for writing."
+    end
+    Marshal.dump({"confusability" => @confusability,
+                  "counts_fe_glob" => @counts_fe_glob,
+                  "counts_gffe_glob" => @counts_gffe_glob,
+                  "frame_confusability" => @frame_confusability,
+                  "overall_confusability" => @overall_confusability
+                 },
+                 file)
+  end
+  def from_file(filename)
+    begin
+      file = File.new(filename)
+    rescue
+      raise "Couldn't open file #{filename} for reading."
+    end
+    hash = Marshal.load(file)
+    @confusability = hash["confusability"]
+    @counts_fe_glob = hash["counts_fe_glob"]
+    @counts_gffe_glob = hash["counts_gffe_glob"]
+    @frame_confusability = hash["frame_confusability"]
+    @overall_confusability =  hash["overall_confusability"]
+  end
+end

data/lib/rosy/RosyEval.rb ADDED Viewed

@@ -0,0 +1,465 @@
+# RosyEval
+# KE May 05
+#
+# Evaluation for Rosy:
+# Precision, Recall, F-score
+# Output to evaluation file,
+# plus optional output of evaluation log file.
+#
+# Builds on the general Salsa Eval package
+# Salsa packages
+require "common/Eval"
+require "common/ruby_class_extensions"
+# Rosy packages
+require "rosy/RosyIterator"
+require "rosy/RosySplit"
+require "rosy/RosyTask"
+require "rosy/RosyPruning"
+# Frprep packages
+require "common/FrPrepConfigData"
+#######################################################################
+# This class is a subclass of the general evaluation class
+# Eval, which makes evaluation results readable via
+# readable object variables
+#
+# step: can be argrec, arglab, onestep, as usual, but also
+#       - "all":
+#          evaluate argrec and arglab together.
+#          When argrec == NONE, use the argrec value, else use the arglab value
+#       - "prune":
+#          evaluate the pruning column as if it were an argrec assignment
+#
+# When step == argrec or prune, evaluate _only_ the target class FE
+# Otherwise, evaluate all target classes
+class RosyEval < Eval
+  def initialize(exp,      # RosyConfigData object: experiment file
+		 ttt_obj,  # RosyTrainingTestTable object
+		 step,     # string: argrec, arglab, onestep, all, prune
+		 splitID,  # string: splitlog ID, or nil
+		 testID,   # string: test ID, or nil
+		 outfilename, # string: name of file to print output to
+		 logfilename, # string: name of file to print eval log to (may be nil)
+                 dont_adjoin_frprep_exp) # string: if non-nil, don't re-adjoin frprep experiment obj
+    @exp = exp
+    @step = step
+    if outfilename
+      $stderr.puts "Rosy evaluation: printing results to " + outfilename
+    end
+    if logfilename
+     $stderr.puts "and printing an evaluation log to " + logfilename
+    end
+    ##
+    # add preprocessing information to the experiment file object
+    unless dont_adjoin_frprep_exp
+      if splitID
+        # use split data
+        preproc_expname = @exp.get("preproc_descr_file_train")
+      else
+        # use test data
+        preproc_expname = @exp.get("preproc_descr_file_test")
+      end
+      if not(preproc_expname)
+        $stderr.puts "Please set the name of the preprocessing exp. file name"
+        $stderr.puts "in the experiment file."
+        exit 1
+      elsif not(File.readable?(preproc_expname))
+        $stderr.puts "Error in the experiment file:"
+        $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
+        exit 1
+      end
+      preproc_exp = FrPrepConfigData.new(preproc_expname)
+      @exp.adjoin(preproc_exp)
+    end
+    ##
+    # evaluate which labels?
+    if ["argrec", "prune"].include? @step
+      # evaluate only the label "FE"
+      super(outfilename, logfilename, "FE")
+    else
+      # evaluate all target classes
+      super(outfilename, logfilename)
+    end
+    ##
+    # what are classifier columns?
+    case @step
+    when "all"
+      # read one argrec and one arglab classifier run column
+      @classif_column_argrec = ttt_obj.existing_runlog("argrec", "test", testID,splitID)
+      @classif_column_arglab = ttt_obj.existing_runlog("arglab", "test", testID,splitID)
+      @columns = ["gold", @classif_column_argrec, @classif_column_arglab]
+      if @classif_column_argrec.nil? or @classif_column_arglab.nil?
+        # no run found for the given specifications
+        $stderr.puts "Couldn't determine the run to evaluate."
+        $stderr.puts "There were either none or too many possible runs given your specification.\n"
+        $stderr.puts "Here is a list of all runs the system knows for this experiment ID:\n\n"
+        $stderr.puts ttt_obj.runlog_to_s("test", testID, splitID)
+        exit 1
+      end
+    when "prune"
+      # read pruning column, evaluate as a kind of argrec assignment
+      unless Pruning.prune?(@exp)
+        raise "Error: Pruning evaluation without pruning column. Skipping."
+      end
+      @classif_column = Pruning.colname(@exp)
+      @columns = ["gold", @classif_column]
+    else
+      # read the classifier run column for the current step
+      @classif_column = ttt_obj.existing_runlog(@step, "test", testID,splitID)
+      @columns = ["gold", @classif_column]
+      if @classif_column.nil?
+        # no run found for the given specifications
+        $stderr.puts "Couldn't determine the run to evaluate."
+        $stderr.puts "There were either none or too many possible runs given your specification.\n"
+        $stderr.puts "Here is a list of all runs the system knows for this experiment ID:\n\n"
+        $stderr.puts ttt_obj.runlog_to_s("test", testID, splitID)
+        exit 1
+      end
+    end
+    ##
+    # make object for iterating through groups and making views
+    case @step
+    when "all"
+      # all: no step in particular
+      @iterator = RosyIterator.new(ttt_obj, exp, "test",
+                                   "step" => nil,
+                                   "testID" => testID,
+                                   "splitID" => splitID,
+                                   "xwise" => "frame")
+    when "prune"
+      # prune: use argrec
+      @iterator = RosyIterator.new(ttt_obj, exp, "test",
+                                   "step" => "argrec",
+                                   "testID" => testID,
+                                   "splitID" => splitID)
+    else
+      # use the given step
+      @iterator = RosyIterator.new(ttt_obj, exp, "test",
+                                   "step" => @step,
+                                   "testID" => testID,
+                                   "splitID" => splitID)
+    end
+    ##
+    # xwise
+    if @step == "all"
+      # argrec and arglab may have different xwises,
+      # which would create trouble.
+      # just use "frame" instead
+      @xwise = ["frame"]
+    else
+      # evaluate as you have trained and tested
+      @xwise = @iterator.get_xwise_column_names()
+    end
+    ##
+    # split? then include FE labels from unparsed sentences
+    # in count of gold labels
+    if splitID
+      # get a FailedParses object for this split
+      @failed_parses_split = FailedParses.new()
+      fp_filename = File.new_filename(@exp.instantiate("rosy_dir",
+                                                  "exp_ID" => @exp.get("experiment_ID")),
+                                 @exp.instantiate("failed_file",
+                                                  "exp_ID" => @exp.get("experiment_ID"),
+                                                  "split_ID" => splitID,
+                                                  "dataset" => "test"))
+      @failed_parses_split.load(fp_filename)
+    end
+    # announce the task
+    $stderr.puts "---------"
+    $stderr.print "Rosy experiment #{@exp.get("experiment_ID")}: Evaluating "
+    if splitID
+      $stderr.puts "on split dataset #{splitID}"
+    else
+      $stderr.puts "on test dataset #{testID}"
+    end
+    $stderr.puts "---------"
+  end
+  ###
+  protected
+  ###
+  # each_group
+  #
+  # yield each group name in turn
+  def each_group()
+    @view = nil
+    # for the sake of the failed parses module:
+    # it can split the failed parses by frame, target and target_pos,
+    # but if our "xwise" splits the data along any further columns,
+    # the failed parses module cannot know how to split up its failed parses.
+    # so see whether we've got any column names besides the three named above
+    # in our xwise,
+    # and if so, count the groups and split the failed parses evenly between them
+    normal_xwise_cols = ["frame", "target", "target_pos"] & @xwise
+    extra_xwise_cols = @xwise - normal_xwise_cols
+    # num_groups_for_normalxwise: hash: normal_xwise_values(string) -> num. of
+    #  groups with these normal xwise values(integer)
+    # where the key normal_xwise_values is a conjunction of
+    # strings <col_name>=<value> joined by commas,
+    # and the column names are in alphabetical order
+    num_groups_for_normalxwise = Hash.new(0)
+    unless extra_xwise_cols.empty?
+      # we do have extra columns
+      # for each value sequence for normal_xwise_cols: find out how many values
+      # of extra xwise col.s there are
+      @iterator.each_group() { |group_descr_hash, group_name|
+        # make the hash key
+        key = normal_xwise_cols.sort.map { |col_name|
+          col_name + "=" + group_descr_hash[col_name]
+        }.join(",")
+        # record one occurrence of this hash key
+        num_groups_for_normalxwise[key] += 1
+      }
+    end
+    @iterator.each_group() { |group_descr_hash, group_name|
+      if @exp.get("verbose")
+        $stderr.puts group_name
+      end
+      # construct view for the current group
+      @view = @iterator.get_a_view_for_current_group(@columns)
+      ##
+      # get counts of FE labels from unparsed sentences:
+      # first take apart the group label to find
+      # the frame name, target name, target POS name in this group
+      # (all but one may be nil)
+      frame = target = target_pos = nil
+      # get a description of this group, array of pairs [column name, value]
+      # where column name is the name of one database column
+      @xwise.interleave(group_name.split()).each { |col_name, col_value|
+        case col_name
+        when "frame"
+          frame = col_value
+        when "target"
+          target = col_value
+        when "target_pos"
+          target_pos = col_value
+        else
+          # additional database columns: handled below
+        end
+      }
+      # do we have additional column names in "xwise", besides 'frame', 'target', 'target_pos'?
+      if extra_xwise_cols.empty?
+        split_between_groups = 1
+      else
+        key = normal_xwise_cols.sort.map { |col_name|
+          col_name + "=" + group_descr_hash[col_name]
+        }.join(",")
+        split_between_groups = num_groups_for_normalxwise[key]
+        # sanity check
+        if split_between_groups == 0
+          raise "shouldn't be here"
+        end
+      end
+      # failed_fes returns: hash that maps FE names [String] onto numbers of failed FEs [Int]
+      if @failed_parses_split
+        @failed_parses_split.failed_fes(frame, target, target_pos).each_pair { |fe, count|
+          # add this number of gold labels we failed to find
+          # to the number of gold labels that Eval counts
+          # if argrec, map all non-NONE FEs to "FE"
+          if @step == "argrec" and fe != @exp.get("noval")
+            fe = "FE"
+          end
+          inject_gold_counts(group_name, fe, (count.to_f / split_between_groups.to_f).round)
+        }
+      end
+      # yield the name of the group to the Eval object for evaluation
+      yield group_name
+      @view.close()
+    }
+  end
+  ###
+  # each_instance
+  #
+  # given a group name, yield each instance of this group in turn,
+  # or rather: yield pairs [gold_class(string), assigned_class(string)]
+  #
+  # this method depends on each_group() having been called before and
+  # having initialized @view to the right view object
+  def each_instance(group) # string: group name
+    case @step
+    when "all"
+      # step "all":
+      # if the argrec label is "NONE", use that as the assigned label.
+      # else use the arglab-label
+      @view.each_hash { |row|
+        if row[@classif_column_argrec] == @exp.get("noval")
+          yield [ row["gold"], row[@classif_column_argrec] ]
+        else
+          yield [ row["gold"], row[@classif_column_arglab] ]
+        end
+      }
+    when "prune"
+      # step "prune":
+      # if the pruning column has entry 1, regard as assignment "FE",
+      # else regard as assignment "NONE".
+      @view.each_hash { |row|
+        if row[@classif_column] == "1"
+          yield [ row["gold"], "FE" ]
+        else
+          yield [ row["gold"], @exp.get("noval") ]
+        end
+      }
+    else
+      # argrec, arglab, onestep:
+      # just yield pairs [goldlabel, classif_column_label]
+      # as given in the view
+      @view.each_hash { |row|
+        yield [row["gold"], row[@classif_column]]
+      }
+    end
+  end
+end
+###########################################################33
+# This is the class to be called from rosy.rb
+###########################################################33
+class RosyEvalTask < RosyTask
+  def initialize(exp,      # RosyConfigData object: experiment description
+		 opts,     # hash: runtime argument option (string) -> value (string)
+		 ttt_obj)  # RosyTrainingTestTable object
+    #####
+    # In enduser mode, this whole task is unavailable
+    in_enduser_mode_unavailable()
+    @exp = exp
+    @ttt_obj = ttt_obj
+    ##
+    # check runtime options
+    @step = "both"
+    @splitID = nil
+    @testID = default_test_ID()
+    opts.each do |opt,arg|
+      case opt
+      when "--step"
+	unless ["argrec", "arglab", "both", "onestep"].include? arg
+	  raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
+	end
+	@step = arg
+      when "--logID"
+	@splitID = arg
+      when "--testID"
+	@testID = arg
+      else
+	# this is an option that is okay but has already been read and used by rosy.rb
+      end
+    end
+  end
+  def perform()
+    dont_adjoin_frprep_exp = nil
+    original_step = @step
+    if ["both", "argrec", "onestep"].include? original_step and
+        Pruning.prune?(@exp)
+      # evaluate pruning
+      $stderr.puts "Rosy evaluating pruning"
+      @step = "prune"
+      perform_aux()
+      dont_adjoin_frprep_exp = "dont_adjoin_frprep_exp"
+    end
+    if original_step == "both"
+      # both? then do first argrec, then arglab
+      $stderr.puts "Rosy evaluating step argrec"
+      @step = "argrec"
+      perform_aux(dont_adjoin_frprep_exp)
+      $stderr.puts "Rosy evaluating step arglab"
+      @step = "arglab"
+      perform_aux("dont_adjoin_frprep_exp")
+# KE Jan 30, 2006: evaluation "all" deactivated until we've
+# figured out how to evaluate accuracy for the NONE class
+#      $stderr.puts "Rosy overall evaluation"
+#      @step = "all"
+#      perform_aux("dont_adjoin_frprep_exp")
+    else
+      # not both? then just do one
+      @step = original_step
+      perform_aux(dont_adjoin_frprep_exp)
+    end
+  end
+  ###############3
+  private
+  # perform_aux: do the actual work of the perform() method
+  # moved here because of the possibility of having @step=="both",
+  # which makes it necessary to perform two eval steps one after the other
+  def perform_aux(dont_adjoin_frprep_exp = nil)  # string passed on to RosyEval initialize method
+    # construct names for evaluation output file
+    # and evaluation log file (which classifies each instances as correct/incorrect/unassigned)
+    if @splitID
+      outfilename_id = "split" + @splitID
+    else
+      outfilename_id = "test" + @testID
+    end
+    @outfilename = File.new_filename(@exp.instantiate("rosy_dir",
+                                                      "exp_ID" => @exp.get("experiment_ID")),
+                                     @exp.instantiate("eval_file",
+                                                      "exp_ID" => @exp.get("experiment_ID"),
+                                                      "test_ID" => outfilename_id,
+                                                      "step" => @step))
+    if @exp.get("print_eval_log")
+      @logfilename = File.new_filename(@exp.instantiate("rosy_dir",
+                                                        "exp_ID" => @exp.get("experiment_ID")),
+                                       @exp.instantiate("log_file",
+                                                        "exp_ID" => @exp.get("experiment_ID"),
+                                                        "test_ID" => outfilename_id,
+                                                        "step" => @step))
+    else
+      @logfilename = nil
+    end
+    @eval_obj = RosyEval.new(@exp, @ttt_obj, @step, @splitID, @testID,
+                             @outfilename, @logfilename,
+                             dont_adjoin_frprep_exp)
+    @eval_obj.compute()
+  end
+end