RubyGems - shalmaneser-rosy - Versions diffs - 1.2.0.rc4 → 1.2.rc5 - Mend

shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Files changed (41) hide show

checksums.yaml +4 -4
data/README.md +47 -18
data/bin/rosy +14 -7
data/lib/rosy/FailedParses.rb +22 -20
data/lib/rosy/FeatureInfo.rb +35 -31
data/lib/rosy/GfInduce.rb +132 -130
data/lib/rosy/GfInduceFeature.rb +86 -68
data/lib/rosy/InputData.rb +59 -55
data/lib/rosy/RosyConfusability.rb +47 -40
data/lib/rosy/RosyEval.rb +55 -55
data/lib/rosy/RosyFeatureExtractors.rb +295 -290
data/lib/rosy/RosyFeaturize.rb +54 -67
data/lib/rosy/RosyInspect.rb +52 -50
data/lib/rosy/RosyIterator.rb +73 -67
data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
data/lib/rosy/RosyPruning.rb +39 -31
data/lib/rosy/RosyServices.rb +116 -115
data/lib/rosy/RosySplit.rb +55 -53
data/lib/rosy/RosyTask.rb +7 -3
data/lib/rosy/RosyTest.rb +174 -191
data/lib/rosy/RosyTrain.rb +46 -50
data/lib/rosy/RosyTrainingTestTable.rb +101 -99
data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
data/lib/rosy/external_feature_extractor.rb +35 -0
data/lib/rosy/opt_parser.rb +231 -201
data/lib/rosy/rosy.rb +63 -64
data/lib/rosy/rosy_conventions.rb +66 -0
data/lib/rosy/rosy_error.rb +15 -0
data/lib/rosy/var_var_restriction.rb +16 -0
data/lib/shalmaneser/rosy.rb +1 -0
metadata +26 -19
data/lib/rosy/ExternalConfigData.rb +0 -58
data/lib/rosy/View.rb +0 -418
data/lib/rosy/rosy_config_data.rb +0 -121
data/test/frprep/test_opt_parser.rb +0 -94
data/test/functional/functional_test_helper.rb +0 -58
data/test/functional/test_fred.rb +0 -47
data/test/functional/test_frprep.rb +0 -99
data/test/functional/test_rosy.rb +0 -40

data/lib/rosy/RosyConfusability.rb CHANGED Viewed

@@ -6,15 +6,15 @@
 # for the data in the (training) database there.
 #
 # We define confusability as follows:
-# Given a frame fr, let
+# Given a frame fr, let
 # - fes(fr) the FEs of fr (a set)
 # - gfs(fe) the grammatical functions realizing the FE fe in the data
 # - gfs(fr) = U_{fe \in fes(fr)} gfs(fe) the grammatical functions realizing roles of fr
-#
+#
 # Then the entropy of a grammatical function gf within fr is
 #
 # gfe_{fr}(gf) = \sum_{fe \in fes(fr)} -p(fe|gf) log p(fe|gf)
-#
+#
 # where p(fe|gf) = f(gf, fe) / f(gf)
 #
 # And the confusability of a frame element fe of fr is
@@ -23,16 +23,21 @@
 #
 # where p(gf|fe) = f(gf, fe) / f(fe)
+# @todo This require statement is wrong. This file is not read in.
 require "RosyConfigData"
 require "RosyIterator"
 require "RosyConventions"
 require "TargetsMostFrequentFrame"
+# This is WRONG!!!!
+# @todo Remove this!
 require "mysql"
+module Shalmaneser
+module Rosy
 class RosyConfusability
   include TargetsMostFrequentSc
   attr_reader :confusability, :counts_fe_glob, :frame_confusability, :overall_confusability
   def initialize(exp) # RosyConfigData object
@@ -53,14 +58,14 @@ class RosyConfusability
       "Obj_Comp", "Obj", "Mod_Head", "Ext_Comp_Obj",
       "Gen_Head", "Ext_Gen_Mod"
       # with duplicates
-#       "Ext_Comp", "Mod", "Comp", "Gen",
-#       "Ext_Obj", "Ext", "", "Ext_Obj_Comp",
-#       "Ext_Comp_Comp", "Head", "Mod_Mod", "Gen_Mod",
-#       "Ext_Mod", "Comp_Comp", "Mod_Comp", "Ext_Gen",
-#       "Gen_Comp", "Head_Head", "Ext_Comp_Comp_Comp", "Head_Comp",
+#       "Ext_Comp", "Mod", "Comp", "Gen",
+#       "Ext_Obj", "Ext", "", "Ext_Obj_Comp",
+#       "Ext_Comp_Comp", "Head", "Mod_Mod", "Gen_Mod",
+#       "Ext_Mod", "Comp_Comp", "Mod_Comp", "Ext_Gen",
+#       "Gen_Comp", "Head_Head", "Ext_Comp_Comp_Comp", "Head_Comp",
 # # "Ext_Ext_Comp",
 # #       "Ext_Obj_Comp_Comp", "Obj_Comp",
-# #       "Ext_Mod_Mod", "Comp_Comp_Comp",
+# #       "Ext_Mod_Mod", "Comp_Comp_Comp",
 # #       "Ext_Ext_Obj", "Ext_Mod_Comp", "Comp_Ext", "Obj",
 # #       "Ext_Ext", "Ext_Obj_Obj", "Mod_Mod_Mod", "Gen_Mod_Mod",
 # #       "Ext_Comp_Comp_Comp_Comp", "Gen_Head", "Mod_Head",
@@ -74,29 +79,29 @@ class RosyConfusability
               additionals) # array:string: "target", "target_pos", "gframe", "fgframe"
     ###
     # open and initialize stuff:
     # open database
-    database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
+    database = Mysql.real_connect(@exp.get('host'), @exp.get('user'),
                                   @exp.get('passwd'), @exp.get('dbname'))
     # make an object that creates views.
     # read one frame at a time.
     iterator = RosyIterator.new(database, @exp, "train",
-                                "splitID" => splitID,
+                                "splitID" => splitID,
                                 "xwise" => "frame")
     # get value for "no val"
     noval = @exp.get("noval")
     counts_frame = Hash.new(0)
     # iterate through all frames and compute confusability of each FE
     iterator.each_group { |group_descr_hash, frame|
       $stderr.puts "Computing confusability for #{frame}"
       # read all instances of the frame, columns: FE and GF
       view = iterator.get_a_view_for_current_group(["sentid","gold", "fn_gf",
                                                     "target","target_pos", "frame"])
       if additionals.include? "tmfframe"
         # find most frequent gframe for each target
         tmfframe = determine_target_most_frequent_sc(view, noval)
@@ -110,7 +115,7 @@ class RosyConfusability
       view.each_sentence { |sentence|
         # make string consisting of all FN GFs of this sentence
-        allgfs = Array.new()
+        allgfs = []
         sentence.each { |inst|
           if inst["fn_gf"] != noval
             allgfs << inst["fn_gf"]
@@ -132,13 +137,13 @@ class RosyConfusability
             # don't count target among the FEs
             next
           end
           if row["gold"] != noval
             counts_fe[row["gold"]] += 1
           end
           if row["fn_gf"] != noval and row["fn_gf"] != "target"
             gf = row["fn_gf"]
             additionals.each { |additional|
               case additional
               when "target"
@@ -154,7 +159,7 @@ class RosyConfusability
                 @frequent_gframes.each { |fgframe|
                   if fgframe.subsumed_by?(allgfs)
                     # fgframe is a subset of allgfs
-                    if maxfgf.nil? or fgframe.length() > maxfgf.length()
+                    if maxfgf.nil? or fgframe.length > maxfgf.length
                       maxfgf = fgframe
                     end
                   end
@@ -176,26 +181,26 @@ class RosyConfusability
             counts_gf[gf] += 1
           end
           if row["gold"] != noval and gf
             counts_gffe[gf + " " + row["gold"]] += 1
           end
         } # each row of sentence
       } # each sentence of view
       # compute gf entropy
       # gfe_{fr}(gf) = \sum_{fe \in fes(fr)} -p(fe|gf) log_2 p(fe|gf)
-      #
+      #
       # where p(fe|gf) = f(gf, fe) / f(gf)
-      gf_entropy = Hash.new
+      gf_entropy = {}
       counts_gf.keys.each { |gf|
         gf_entropy[gf] = 0.0
         counts_fe.keys.each { |fe|
           if counts_gf[gf] > 0
             p_gf_fe = counts_gffe[gf + " " + fe].to_f / counts_gf[gf].to_f
             # get log_2 via log_10
             if p_gf_fe > 0.0
               gf_entropy[gf] -= p_gf_fe * Math.log10(p_gf_fe) * 3.32193
@@ -203,18 +208,18 @@ class RosyConfusability
           end
         } # each FE for this GF
       } # each GF (gf entropy)
       # compute FE confusability
       # c_{fr}(fe) = \sum_{gf \in gfs(fr)} p(gf|fe) gfe_{fr}(gf)
       #
       # where p(gf|fe) = f(gf, fe) / f(fe)
       counts_fe.keys.each { |fe|
         @confusability[frame + " " + fe] = 0.0
         counts_gf.keys.each { |gf|
           if counts_fe[fe] > 0
             p_fe_gf = counts_gffe[gf + " " + fe].to_f / counts_fe[fe].to_f
             @confusability[frame + " " + fe] += p_fe_gf * gf_entropy[gf]
           end
         } # each GF for this FE
@@ -239,7 +244,7 @@ class RosyConfusability
       # compute overall frame confusability
       # omitting rare FEs with below 5 occurrences:
-      #
+      #
       # c(fr) = sum_{fe \in fes(fr)} f(fe)/f(fr) * c_{fr}(fe)
       #       = \sum_{gf \in gfs(fr)} p(gf|fr) gfe_{fr}(gf)
       #
@@ -257,7 +262,7 @@ class RosyConfusability
         end
       }
     } # each frame
     # compute overall confusability
     # c = \sum{fr \in frames} f(fr)/N * c(fr)
     #
@@ -271,26 +276,26 @@ class RosyConfusability
       @overall_confusability += (count.to_f / counts_overall.to_f) * @frame_confusability[frame]
     }
   end
   # return a copy of @counts_fe_glob, from which all fes with less than 5 occurrences are deleted
-  def get_global_counts
+  def get_global_counts
     global_counts = @counts_fe_glob.clone
     global_counts.delete_if {|key, value| value < 5}
     return global_counts
   end
   ###
   #
-  # compute sparseness statistics over the set of
+  # compute sparseness statistics over the set of
   # base events used for computing the confusability
   # returns an array of length 4:
   # - number of events with freq 1
   # - number of events with freq 2
   # - number of events with freq 3-5
   # - number of events with freq > 5
-  def counts()
+  def counts
     counts = [0,0,0,0]
     @counts_gffe_glob.each_value {|freq|
       case freq
@@ -306,7 +311,7 @@ class RosyConfusability
     }
     return counts
   end
   def to_file(filename)
     begin
       file = File.new(filename,"w")
@@ -321,7 +326,7 @@ class RosyConfusability
                  },
                  file)
   end
   def from_file(filename)
     begin
       file = File.new(filename)
@@ -336,3 +341,5 @@ class RosyConfusability
     @overall_confusability =  hash["overall_confusability"]
   end
 end
+end
+end

data/lib/rosy/RosyEval.rb CHANGED Viewed

@@ -9,40 +9,42 @@
 # Builds on the general Salsa Eval package
 # Salsa packages
-require "common/Eval"
-require "common/ruby_class_extensions"
+require 'eval'
+require "ruby_class_extensions"
 # Rosy packages
 require "rosy/RosyIterator"
 require "rosy/RosySplit"
 require "rosy/RosyTask"
 require "rosy/RosyPruning"
+require 'rosy/rosy_conventions'
-# Frprep packages
-require "common/prep_config_data"
+require 'configuration/frappe_config_data'
 #######################################################################
 # This class is a subclass of the general evaluation class
 # Eval, which makes evaluation results readable via
 # readable object variables
 #
-# step: can be argrec, arglab, onestep, as usual, but also
+# step: can be argrec, arglab, onestep, as usual, but also
 #       - "all":
 #          evaluate argrec and arglab together.
 #          When argrec == NONE, use the argrec value, else use the arglab value
-#       - "prune":
+#       - "prune":
 #          evaluate the pruning column as if it were an argrec assignment
 #
 # When step == argrec or prune, evaluate _only_ the target class FE
 # Otherwise, evaluate all target classes
+module Shalmaneser
+module Rosy
 class RosyEval < Eval
   def initialize(exp,      # RosyConfigData object: experiment file
-		 ttt_obj,  # RosyTrainingTestTable object
-		 step,     # string: argrec, arglab, onestep, all, prune
-		 splitID,  # string: splitlog ID, or nil
-		 testID,   # string: test ID, or nil
-		 outfilename, # string: name of file to print output to
-		 logfilename, # string: name of file to print eval log to (may be nil)
+                 ttt_obj,  # RosyTrainingTestTable object
+                 step,     # string: argrec, arglab, onestep, all, prune
+                 splitID,  # string: splitlog ID, or nil
+                 testID,   # string: test ID, or nil
+                 outfilename, # string: name of file to print output to
+                 logfilename, # string: name of file to print eval log to (may be nil)
                  dont_adjoin_frprep_exp) # string: if non-nil, don't re-adjoin frprep experiment obj
     @exp = exp
     @step = step
@@ -50,7 +52,7 @@ class RosyEval < Eval
     if outfilename
       $stderr.puts "Rosy evaluation: printing results to " + outfilename
     end
-    if logfilename
+    if logfilename
      $stderr.puts "and printing an evaluation log to " + logfilename
     end
@@ -73,7 +75,7 @@ class RosyEval < Eval
         $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
         exit 1
       end
-      preproc_exp = FrPrepConfigData.new(preproc_expname)
+      preproc_exp = ::Shalmaneser::Configuration::FrappeConfigData.new(preproc_expname)
       @exp.adjoin(preproc_exp)
     end
@@ -104,7 +106,7 @@ class RosyEval < Eval
         $stderr.puts ttt_obj.runlog_to_s("test", testID, splitID)
         exit 1
       end
     when "prune"
       # read pruning column, evaluate as a kind of argrec assignment
       unless Pruning.prune?(@exp)
@@ -127,29 +129,29 @@ class RosyEval < Eval
         exit 1
       end
     end
     ##
     # make object for iterating through groups and making views
     case @step
     when "all"
       # all: no step in particular
-      @iterator = RosyIterator.new(ttt_obj, exp, "test",
-                                   "step" => nil,
-                                   "testID" => testID,
+      @iterator = RosyIterator.new(ttt_obj, exp, "test",
+                                   "step" => nil,
+                                   "testID" => testID,
                                    "splitID" => splitID,
                                    "xwise" => "frame")
     when "prune"
       # prune: use argrec
-      @iterator = RosyIterator.new(ttt_obj, exp, "test",
-                                   "step" => "argrec",
-                                   "testID" => testID,
+      @iterator = RosyIterator.new(ttt_obj, exp, "test",
+                                   "step" => "argrec",
+                                   "testID" => testID,
                                    "splitID" => splitID)
     else
       # use the given step
-      @iterator = RosyIterator.new(ttt_obj, exp, "test",
-                                   "step" => @step,
-                                   "testID" => testID,
+      @iterator = RosyIterator.new(ttt_obj, exp, "test",
+                                   "step" => @step,
+                                   "testID" => testID,
                                    "splitID" => splitID)
     end
@@ -162,7 +164,7 @@ class RosyEval < Eval
       @xwise = ["frame"]
     else
       # evaluate as you have trained and tested
-      @xwise = @iterator.get_xwise_column_names()
+      @xwise = @iterator.get_xwise_column_names
     end
     ##
@@ -170,7 +172,7 @@ class RosyEval < Eval
     # in count of gold labels
     if splitID
       # get a FailedParses object for this split
-      @failed_parses_split = FailedParses.new()
+      @failed_parses_split = FailedParses.new
       fp_filename = File.new_filename(@exp.instantiate("rosy_dir",
                                                   "exp_ID" => @exp.get("experiment_ID")),
                                  @exp.instantiate("failed_file",
@@ -198,7 +200,7 @@ class RosyEval < Eval
   # each_group
   #
   # yield each group name in turn
-  def each_group()
+  def each_group
     @view = nil
@@ -224,7 +226,7 @@ class RosyEval < Eval
       # for each value sequence for normal_xwise_cols: find out how many values
       # of extra xwise col.s there are
-      @iterator.each_group() { |group_descr_hash, group_name|
+      @iterator.each_group { |group_descr_hash, group_name|
         # make the hash key
         key = normal_xwise_cols.sort.map { |col_name|
@@ -236,7 +238,7 @@ class RosyEval < Eval
       }
     end
-    @iterator.each_group() { |group_descr_hash, group_name|
+    @iterator.each_group { |group_descr_hash, group_name|
       if @exp.get("verbose")
         $stderr.puts group_name
@@ -255,7 +257,7 @@ class RosyEval < Eval
       # get a description of this group, array of pairs [column name, value]
       # where column name is the name of one database column
-      @xwise.interleave(group_name.split()).each { |col_name, col_value|
+      @xwise.interleave(group_name.split).each { |col_name, col_value|
         case col_name
         when "frame"
           frame = col_value
@@ -283,7 +285,7 @@ class RosyEval < Eval
         end
       end
-      # failed_fes returns: hash that maps FE names [String] onto numbers of failed FEs [Int]
+      # failed_fes returns: hash that maps FE names [String] onto numbers of failed FEs [Int]
       if @failed_parses_split
         @failed_parses_split.failed_fes(frame, target, target_pos).each_pair { |fe, count|
           # add this number of gold labels we failed to find
@@ -299,7 +301,7 @@ class RosyEval < Eval
       # yield the name of the group to the Eval object for evaluation
       yield group_name
-      @view.close()
+      @view.close
     }
   end
@@ -324,7 +326,7 @@ class RosyEval < Eval
           yield [ row["gold"], row[@classif_column_arglab] ]
         end
       }
     when "prune"
       # step "prune":
       # if the pruning column has entry 1, regard as assignment "FE",
@@ -346,7 +348,7 @@ class RosyEval < Eval
         yield [row["gold"], row[@classif_column]]
       }
     end
   end
 end
@@ -356,12 +358,8 @@ end
 class RosyEvalTask < RosyTask
   def initialize(exp,      # RosyConfigData object: experiment description
-		 opts,     # hash: runtime argument option (string) -> value (string)
-		 ttt_obj)  # RosyTrainingTestTable object
-    #####
-    # In enduser mode, this whole task is unavailable
-    in_enduser_mode_unavailable()
+                 opts,     # hash: runtime argument option (string) -> value (string)
+                 ttt_obj)  # RosyTrainingTestTable object
     @exp = exp
     @ttt_obj = ttt_obj
@@ -370,26 +368,26 @@ class RosyEvalTask < RosyTask
     # check runtime options
     @step = "both"
     @splitID = nil
-    @testID = default_test_ID()
+    @testID = Rosy.default_test_ID
     opts.each do |opt,arg|
       case opt
       when "--step"
-	unless ["argrec", "arglab", "both", "onestep"].include? arg
-	  raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
-	end
-	@step = arg
+        unless ["argrec", "arglab", "both", "onestep"].include? arg
+          raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
+        end
+        @step = arg
       when "--logID"
-	@splitID = arg
+        @splitID = arg
       when "--testID"
-	@testID = arg
+        @testID = arg
       else
-	# this is an option that is okay but has already been read and used by rosy.rb
+        # this is an option that is okay but has already been read and used by rosy.rb
       end
     end
   end
-  def perform()
+  def perform
     dont_adjoin_frprep_exp = nil
     original_step = @step
@@ -398,7 +396,7 @@ class RosyEvalTask < RosyTask
       # evaluate pruning
       $stderr.puts "Rosy evaluating pruning"
       @step = "prune"
-      perform_aux()
+      perform_aux
       dont_adjoin_frprep_exp = "dont_adjoin_frprep_exp"
     end
@@ -408,7 +406,7 @@ class RosyEvalTask < RosyTask
       @step = "argrec"
       perform_aux(dont_adjoin_frprep_exp)
       $stderr.puts "Rosy evaluating step arglab"
       @step = "arglab"
       perform_aux("dont_adjoin_frprep_exp")
@@ -438,7 +436,7 @@ class RosyEvalTask < RosyTask
     if @splitID
       outfilename_id = "split" + @splitID
     else
-      outfilename_id = "test" + @testID
+      outfilename_id = "test" + @testID
     end
     @outfilename = File.new_filename(@exp.instantiate("rosy_dir",
                                                       "exp_ID" => @exp.get("experiment_ID")),
@@ -457,9 +455,11 @@ class RosyEvalTask < RosyTask
     else
       @logfilename = nil
     end
-    @eval_obj = RosyEval.new(@exp, @ttt_obj, @step, @splitID, @testID,
+    @eval_obj = RosyEval.new(@exp, @ttt_obj, @step, @splitID, @testID,
                              @outfilename, @logfilename,
                              dont_adjoin_frprep_exp)
-    @eval_obj.compute()
+    @eval_obj.compute
   end
 end
+end
+end