RubyGems - shalmaneser - Versions diffs - 1.2.0.rc3 → 1.2.0.rc4 - Mend

shalmaneser 1.2.0.rc3 → 1.2.0.rc4

Files changed (47) hide show

checksums.yaml +4 -4
data/README.md +26 -7
data/bin/fred +2 -4
data/doc/exp_files.md +6 -5
data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
data/lib/common/config_format_element.rb +220 -0
data/lib/common/prep_config_data.rb +62 -0
data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
data/lib/fred/FredBOWContext.rb +8 -6
data/lib/fred/FredDetermineTargets.rb +1 -1
data/lib/fred/FredEval.rb +1 -1
data/lib/fred/FredFeaturize.rb +22 -16
data/lib/fred/FredTest.rb +0 -1
data/lib/fred/fred.rb +2 -0
data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
data/lib/fred/opt_parser.rb +1 -1
data/lib/frprep/frprep.rb +1 -1
data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
data/lib/frprep/opt_parser.rb +1 -1
data/lib/rosy/ExternalConfigData.rb +1 -1
data/lib/rosy/RosyEval.rb +1 -1
data/lib/rosy/RosyFeaturize.rb +21 -20
data/lib/rosy/RosyInspect.rb +1 -1
data/lib/rosy/RosyPruning.rb +1 -1
data/lib/rosy/RosyServices.rb +1 -1
data/lib/rosy/RosySplit.rb +1 -1
data/lib/rosy/RosyTest.rb +23 -20
data/lib/rosy/RosyTrain.rb +15 -13
data/lib/rosy/RosyTrainingTestTable.rb +2 -1
data/lib/rosy/View.rb +1 -1
data/lib/rosy/opt_parser.rb +1 -1
data/lib/rosy/rosy.rb +1 -1
data/lib/rosy/rosy_config_data.rb +121 -0
data/lib/shalmaneser/opt_parser.rb +32 -2
data/lib/shalmaneser/version.rb +1 -1
metadata +23 -114
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/lib/common/FrPrepConfigData.rb +0 -66
data/lib/rosy/RosyConfigData.rb +0 -115
metadata.gz.sig +0 -0

data/lib/fred/FredTest.rb CHANGED Viewed

@@ -16,7 +16,6 @@ require "common/SalsaTigerRegXML"
 require "common/ruby_class_extensions"
 # Shalmaneser packages
-require "common/FrPrepConfigData"
 require "common/ML"
 require "fred/Baseline"
 require "fred/FredConventions"

data/lib/fred/fred.rb CHANGED Viewed

@@ -34,6 +34,8 @@ module Fred
         task_obj = FredEval.new(@exp, @opts)
       else
         raise "Shouldn't be here"
+        # @todo AB: this <else> condition should be unpossible
+        #     do in OptionParser
       end
       task_obj.compute

data/lib/fred/{FredConfigData.rb → fred_config_data.rb} RENAMED Viewed

@@ -4,7 +4,7 @@
 # Frame disambiguation system:
 # access to a configuration and experiment description file
-require "common/ConfigData"
+require "common/config_data"
 ##############################
 # Class FredConfigData
@@ -13,71 +13,73 @@ require "common/ConfigData"
 # sets variable names appropriate to WSD task
 class FredConfigData < ConfigData
-  def initialize(filename)
-    # initialize config data object
-    super(filename,          # config file
-	  {
-            "experiment_ID" => "string", # experiment ID
-            "enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
-            "preproc_descr_file_train" => "string", # path to preprocessing files
-            "preproc_descr_file_test" => "string",
-            "directory_output" => "string", # path to Salsa/Tiger XML output directory
-            "verbose" => "bool" ,     # print diagnostic messages?
-            "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
-            "fred_directory" => "string",# directory for internal info
-            "classifier_dir" => "string", # write classifiers here
-            "classifier" => "list",  # classifiers
-            "dbtype" => "string",    # "mysql" or "sqlite"
-            "host" => "string",      # DB access: sqlite only
-            "user" => "string",
-            "passwd" => "string",
-            "dbname" => "string",
+  CONFIG_DEFS = {
+    "experiment_ID" => "string", # experiment ID
+    "enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
+    "preproc_descr_file_train" => "string", # path to preprocessing files
+    "preproc_descr_file_test" => "string",
+    "directory_output" => "string", # path to Salsa/Tiger XML output directory
+    "verbose" => "bool" ,     # print diagnostic messages?
+    "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
+    "fred_directory" => "string",# directory for internal info
+    "classifier_dir" => "string", # write classifiers here
+    "classifier" => "list",  # classifiers
+    "dbtype" => "string",    # "mysql" or "sqlite"
+    "host" => "string",      # DB access: sqlite only
+    "user" => "string",
+    "passwd" => "string",
+    "dbname" => "string",
+    # featurization info
+    "feature" => "list",     # which features to use for the classifier?
+    "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
+    "negsense" => "string",  # binary classifier: negative sense is..?
+    "numerical_features" => "string", # do what with numerical features?
+    # what to do with items that have multiple senses?
+    # 'binarize': binary classifiers, and consider positive
+    #          if the sense is among the gold senses
+    # 'join' : make one joint sense
+    # 'repeat' : make multiple occurrences of the item, one sense per occ
+    # 'keep' : keep as separate labels
+    #
+    # multilabel: consider as assigned all labels
+    # above a certain confidence threshold?
+    "handle_multilabel" => "string",
+    "assignment_confidence_threshold" => "float",
+    # single-sentence context?
+    "single_sent_context" => "bool",
+    # noncontiguous input? then we need access to a larger corpus
+    "noncontiguous_input" => "bool",
+    "larger_corpus_dir" => "string",
+    "larger_corpus_format" => "string",
+    "larger_corpus_encoding" => "string",
+    # Imported from PrepConfigData
+    'do_postag' => 'bool',
+    'do_lemmatize' => 'bool',
+    'do_parse' => 'bool',
+    'pos_tagger' => 'string',
+    'lemmatizer' => 'string',
+    'parser' => 'string',
+    'directory_preprocessed' => 'string',
+    'language' => 'string'
+  }
-            # featurization info
-            "feature" => "list",     # which features to use for the classifier?
-            "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
-	    "negsense" => "string",  # binary classifier: negative sense is..?
-            "numerical_features" => "string", # do what with numerical features?
-            # what to do with items that have multiple senses?
-            # 'binarize': binary classifiers, and consider positive
-            #          if the sense is among the gold senses
-            # 'join' : make one joint sense
-            # 'repeat' : make multiple occurrences of the item, one sense per occ
-            # 'keep' : keep as separate labels
-            #
-            # multilabel: consider as assigned all labels
-            # above a certain confidence threshold?
-            "handle_multilabel" => "string",
-            "assignment_confidence_threshold" => "float",
-            # single-sentence context?
-            "single_sent_context" => "bool",
+  def initialize(filename)
-            # noncontiguous input? then we need access to a larger corpus
-            "noncontiguous_input" => "bool",
-            "larger_corpus_dir" => "string",
-            "larger_corpus_format" => "string",
-            "larger_corpus_encoding" => "string"
-	  },
-	  [ # variables
-            "train",
-           "exp_ID"
-	  ]
-	  )
+    super(filename, CONFIG_DEFS, ["train", "exp_ID"])
     # set access functions for list features
-    set_list_feature_access("classifier",
-                            method("access_classifier"))
-    set_list_feature_access("feature",
-                            method("access_feature"))
+    set_list_feature_access("classifier", method("access_classifier"))
+    set_list_feature_access("feature", method("access_feature"))
   end
   ###
@@ -165,14 +167,15 @@ class FredConfigData < ConfigData
   #
   # returns: a list of pairs [feature_name(string), options(array:string)]
   # of defined features
-  def access_classifier(val_list) # array:array:string: list of tuples defined in config file
-		               # for feature 'feature'
+  # @param val_list [Array] array:array:string: list of tuples defined
+  #   in config file for feature 'feature'
+  def access_classifier(val_list)
     if val_list.nil?
-      return []
+      []
     else
-      return val_list.map { |cl_descr_tuple|
+      val_list.map do |cl_descr_tuple|
         [cl_descr_tuple.first, cl_descr_tuple[1..-1]]
-      }
+      end
     end
   end

data/lib/fred/opt_parser.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 #require 'optparse' # for reimplementation
 require 'getoptlong'
-require "fred/FredConfigData"
+require "fred/fred_config_data"
 module Fred

data/lib/frprep/frprep.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 require 'frprep/do_parses'
-require 'common/frprep_helper'
+require 'common/prep_helper'
 require 'common/FixSynSemMapping'
 # For FN input.
 require 'frprep/FNCorpusXML'

data/lib/frprep/interfaces/berkeley_interface.rb CHANGED Viewed

@@ -63,12 +63,9 @@ class BerkeleyInterface < SynInterfaceSTXML
     parser = ENV['SHALM_BERKELEY_BIN'] || 'berkeleyParser.jar'
     grammar = ENV['SHALM_BERKELEY_MODEL'] || 'grammar.gr'
+    options = ENV['SHALM_BERKELEY_OPTIONS']
-    #berkeley_prog = "java -Xmx2000m -jar #{@program_path}berkeleyParser.jar -gr #{@program_path}ger_sm5.gr"
-    #berkeley_prog = "java -d64 -Xmx10000m -jar #{@program_path}berkeley-parser.jar -gr #{@program_path}gerNegra.01.utf8 "
-    berkeley_prog = "java -jar #{@program_path}#{parser} -gr #{@program_path}#{grammar}"
+    berkeley_prog = "java -jar #{@program_path}#{parser} #{options} -gr #{@program_path}#{grammar}"
     Dir[in_dir + "*" + @insuffix].each do |inputfilename|
@@ -139,10 +136,10 @@ class BerkeleyInterface < SynInterfaceSTXML
         # - a parse beginning with <( (>, <( (TOP>, <( (VROOT> etc.
         # TOP - Negra Grammars
         # VROOT - Tiger Grammars
-        # PSEUDE - Original BP Grammars
+        # PSEUDO - Original BP Grammars
         # ROOT - some english grammars
         # empty identifiers for older Tiger grammars
-	if line.nil? or line=~/^\( *\((PSEUDO|TOP|ROOT|VROOT)? / or line=~/^\(\(\)/
+	if line.nil? or line=~/^(\( *)?\((PSEUDO|TOP|ROOT|VROOT)? / or line=~/^\(\(\)/
           break
 	end
         sentid +=1
@@ -157,8 +154,9 @@ class BerkeleyInterface < SynInterfaceSTXML
       # Insert a top node <VROOT> if missing.
       # Some grammars trained on older Tiger Versions
       # expose this problem.
-      line.sub!(/^(\(\s+\(\s+)/, '\1VROOT')
+      #STDERR.puts "@@@1 <#{line}>"
+      line.sub!(/^(\(\s+\()(\s+)/, '\1VROOT\2')
+      #STDERR.puts "@@@2 <#{line}>"
       # berkeley parser output: remove brackets /(.*)/
       # Remove leading and trailing top level brackets.
       line.sub!(/^\( */, '')

data/lib/frprep/opt_parser.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 # AB, 2010-11-25
 require 'optparse'
-require 'common/FrPrepConfigData'
+require 'common/prep_config_data'
 require 'common/SynInterfaces'
 module FrPrep

data/lib/rosy/ExternalConfigData.rb CHANGED Viewed

@@ -5,7 +5,7 @@
 # for Fred and Rosy:
 # access to configuration and experiment description file
-require 'common/ConfigData'
+require 'common/config_data'
 ##############################
 # Class ExternalConfigData

data/lib/rosy/RosyEval.rb CHANGED Viewed

@@ -19,7 +19,7 @@ require "rosy/RosyTask"
 require "rosy/RosyPruning"
 # Frprep packages
-require "common/FrPrepConfigData"
+require "common/prep_config_data"
 #######################################################################
 # This class is a subclass of the general evaluation class

data/lib/rosy/RosyFeaturize.rb CHANGED Viewed

@@ -9,13 +9,13 @@ require "common/SynInterfaces"
 require "common/ruby_class_extensions"
 # Frprep packages
-require "common/FrPrepConfigData"
+#require "common/prep_config_data"
 # Rosy packages
 require "rosy/FailedParses"
 require "rosy/FeatureInfo"
 require "rosy/InputData"
-require "rosy/RosyConfigData"
+require "rosy/rosy_config_data"
 require "common/RosyConventions"
 require "rosy/RosySplit"
 require "rosy/RosyTask"
@@ -81,24 +81,25 @@ class RosyFeaturize < RosyTask
     ##
     # add preprocessing information to the experiment file object
-    if @dataset
-      preproc_parameter = "preproc_descr_file_" + @dataset
-    else
-      # split data
-      preproc_parameter = "preproc_descr_file_train"
-    end
-    preproc_expname = @exp.get(preproc_parameter)
-    if not(preproc_expname)
-      $stderr.puts "Please set the name of the preprocessing exp. file name"
-      $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
-      exit 1
-    elsif not(File.readable?(preproc_expname))
-      $stderr.puts "Error in the experiment file:"
-      $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
-      exit 1
-    end
-    preproc_exp = FrPrepConfigData.new(preproc_expname)
-    @exp.adjoin(preproc_exp)
+    # @note AB: Commented out due to separation of PrepConfigData.
+    # if @dataset
+    #   preproc_parameter = "preproc_descr_file_" + @dataset
+    # else
+    #   # split data
+    #   preproc_parameter = "preproc_descr_file_train"
+    # end
+    # preproc_expname = @exp.get(preproc_parameter)
+    # if not(preproc_expname)
+    #   $stderr.puts "Please set the name of the preprocessing exp. file name"
+    #   $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
+    #   exit 1
+    # elsif not(File.readable?(preproc_expname))
+    #   $stderr.puts "Error in the experiment file:"
+    #   $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
+    #   exit 1
+    # end
+    # preproc_exp = FrPrepConfigData.new(preproc_expname)
+    # @exp.adjoin(preproc_exp)
     ###
     # find appropriate class for interpreting syntactic structures

data/lib/rosy/RosyInspect.rb CHANGED Viewed

@@ -12,7 +12,7 @@ require "rosy/RosyTrainingTestTable"
 require "rosy/View"
 # Frprep packages
-require "common/FrPrepConfigData"
+require "common/prep_config_data"
 class RosyInspect < RosyTask

data/lib/rosy/RosyPruning.rb CHANGED Viewed

@@ -12,7 +12,7 @@ require "common/ruby_class_extensions"
 require "rosy/RosyFeatureExtractors"
 require "common/RosyConventions"
-require "rosy/RosyConfigData"
+require "rosy/rosy_config_data"
 require "rosy/RosyIterator"
 ###

data/lib/rosy/RosyServices.rb CHANGED Viewed

@@ -16,7 +16,7 @@ require "rosy/RosyTrainingTestTable"
 require "rosy/View"
 # Frprep packages
-require "common/FrPrepConfigData"
+require "common/prep_config_data"
 ###################################################
 class RosyServices < RosyTask

data/lib/rosy/RosySplit.rb CHANGED Viewed

@@ -16,7 +16,7 @@
 require "common/ruby_class_extensions"
 # Frprep packages
-require "common/FrPrepConfigData"
+require "common/prep_config_data"
 # Rosy packages
 require "rosy/FailedParses"

data/lib/rosy/RosyTest.rb CHANGED Viewed

@@ -24,7 +24,7 @@ require "rosy/RosyTrainingTestTable"
 require "rosy/View"
 # Frprep packages
-require "common/FrPrepConfigData" # AB: what the fuck???
+#require "common/prep_config_data" # AB: what the fuck???
 ##########################################################################
 # classifier combination class
@@ -156,25 +156,28 @@ class RosyTest < RosyTask
       ##
       # add preprocessing information to the experiment file object
-      if @splitID
-        # use split data
-        preproc_param = "preproc_descr_file_train"
-      else
-        # use test data
-        preproc_param = "preproc_descr_file_test"
-      end
-      preproc_expname = @exp.get(preproc_param)
-      if not(preproc_expname)
-        $stderr.puts "Please set the name of the preprocessing exp. file name"
-        $stderr.puts "in the experiment file, parameter #{preproc_param}."
-        exit 1
-      elsif not(File.readable?(preproc_expname))
-        $stderr.puts "Error in the experiment file:"
-        $stderr.puts "Parameter #{preproc_param} has to be a readable file."
-        exit 1
-      end
-      preproc_exp = FrPrepConfigData.new(preproc_expname)
-      @exp.adjoin(preproc_exp)
+      # @note AB: Commented out due to separation of PrepConfigData:
+      #   information for SynInteraces required.
+      # if @splitID
+      #   # use split data
+      #   preproc_param = "preproc_descr_file_train"
+      # else
+      #   # use test data
+      #   preproc_param = "preproc_descr_file_test"
+      # end
+      # preproc_expname = @exp.get(preproc_param)
+      # if not(preproc_expname)
+      #   $stderr.puts "Please set the name of the preprocessing exp. file name"
+      #   $stderr.puts "in the experiment file, parameter #{preproc_param}."
+      #   exit 1
+      # elsif not(File.readable?(preproc_expname))
+      #   $stderr.puts "Error in the experiment file:"
+      #   $stderr.puts "Parameter #{preproc_param} has to be a readable file."
+      #   exit 1
+      # end
+      # preproc_exp = FrPrepConfigData.new(preproc_expname)
+      # @exp.adjoin(preproc_exp)
       # announce the task
       $stderr.puts "---------"

data/lib/rosy/RosyTrain.rb CHANGED Viewed

@@ -18,7 +18,7 @@ require "rosy/RosyPruning"
 require "common/ML"
 # Frprep packages
-require "common/FrPrepConfigData"
+#require "common/prep_config_data"
 class RosyTrain < RosyTask
@@ -68,18 +68,20 @@ class RosyTrain < RosyTask
     ##
     # add preprocessing information to the experiment file object
-    preproc_expname = @exp.get("preproc_descr_file_train")
-    if not(preproc_expname)
-      $stderr.puts "Please set the name of the preprocessing exp. file name"
-      $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
-      exit 1
-    elsif not(File.readable?(preproc_expname))
-      $stderr.puts "Error in the experiment file:"
-      $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
-      exit 1
-    end
-    preproc_exp = FrPrepConfigData.new(preproc_expname)
-    @exp.adjoin(preproc_exp)
+    # @note AB: Commented out due to separation of PrepConfigData.
+    #   No information seems to be required.
+    # preproc_expname = @exp.get("preproc_descr_file_train")
+    # if not(preproc_expname)
+    #   $stderr.puts "Please set the name of the preprocessing exp. file name"
+    #   $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
+    #   exit 1
+    # elsif not(File.readable?(preproc_expname))
+    #   $stderr.puts "Error in the experiment file:"
+    #   $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
+    #   exit 1
+    # end
+    # preproc_exp = FrPrepConfigData.new(preproc_expname)
+    # @exp.adjoin(preproc_exp)
     # get_lf returns: array of pairs [classifier_name, options[array]]