RubyGems - shalmaneser-rosy - Versions diffs - 1.2.0.rc4 - Mend

shalmaneser-rosy 1.2.0.rc4

Files changed (38) hide show

checksums.yaml +7 -0
data/.yardopts +10 -0
data/CHANGELOG.md +4 -0
data/LICENSE.md +4 -0
data/README.md +93 -0
data/bin/rosy +17 -0
data/lib/rosy/AbstractFeatureAndExternal.rb +242 -0
data/lib/rosy/ExternalConfigData.rb +58 -0
data/lib/rosy/FailedParses.rb +130 -0
data/lib/rosy/FeatureInfo.rb +242 -0
data/lib/rosy/GfInduce.rb +1115 -0
data/lib/rosy/GfInduceFeature.rb +148 -0
data/lib/rosy/InputData.rb +294 -0
data/lib/rosy/RosyConfusability.rb +338 -0
data/lib/rosy/RosyEval.rb +465 -0
data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
data/lib/rosy/RosyFeaturize.rb +281 -0
data/lib/rosy/RosyInspect.rb +336 -0
data/lib/rosy/RosyIterator.rb +478 -0
data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
data/lib/rosy/RosyPruning.rb +165 -0
data/lib/rosy/RosyServices.rb +744 -0
data/lib/rosy/RosySplit.rb +232 -0
data/lib/rosy/RosyTask.rb +19 -0
data/lib/rosy/RosyTest.rb +829 -0
data/lib/rosy/RosyTrain.rb +234 -0
data/lib/rosy/RosyTrainingTestTable.rb +787 -0
data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
data/lib/rosy/View.rb +418 -0
data/lib/rosy/opt_parser.rb +379 -0
data/lib/rosy/rosy.rb +78 -0
data/lib/rosy/rosy_config_data.rb +121 -0
data/test/frprep/test_opt_parser.rb +94 -0
data/test/functional/functional_test_helper.rb +58 -0
data/test/functional/test_fred.rb +47 -0
data/test/functional/test_frprep.rb +99 -0
data/test/functional/test_rosy.rb +40 -0
metadata +105 -0

@@ -0,0 +1,121 @@
+require 'common/config_data'
+##############################
+# Class RosyConfigData
+#
+# inherits from ConfigData,
+# sets features for ROSY
+class RosyConfigData < ConfigData
+  CONFIG_DEFS = { # features
+    "feature" => "list",
+    "classifier" => "list",
+    "verbose" => "bool" ,
+    "enduser_mode" => "bool",
+    "experiment_ID" => "string",
+    "directory_input_train" => "string",
+    "directory_input_test" => "string",
+    "directory_output" => "string",
+    "preproc_descr_file_train" => "string",
+    "preproc_descr_file_test" => "string",
+    "external_descr_file"    => "string",
+    "dbtype" => "string",    # "mysql" or "sqlite"
+    "host" => "string",      # DB access: sqlite only
+    "user" => "string",
+    "passwd" => "string",
+    "dbname" => "string",
+    "data_dir" => "string",  # for external use
+    "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
+    "classifier_dir" => "string", # if present, special directory for classifiers
+    "classif_column_name" => "string",
+    "main_table_name" => "pattern",
+    "test_table_name" => "pattern",
+    "eval_file" => "pattern",
+    "log_file" => "pattern",
+    "failed_file" => "pattern",
+    "classifier_file" => "pattern",
+    "classifier_output_file" => "pattern",
+    "noval" => "string",
+    "split_nones" => "bool",
+    "print_eval_log" => "bool",
+    "assume_argrec_perfect" => "bool",
+    "xwise_argrec" => "string",
+    "xwise_arglab" => "string",
+    "xwise_onestep" => "string",
+    "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
+    "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
+    "prune" => "string", # pruning prior to argrec?
+    # Imported from PrepConfigData
+    'do_postag' => 'bool',
+    'do_lemmatize' => 'bool',
+    'do_parse' => 'bool',
+    'pos_tagger' => 'string',
+    'lemmatizer' => 'string',
+    'parser' => 'string'
+    }
+  def initialize(filename)
+    super(filename, CONFIG_DEFS, ["exp_ID", "test_ID", "split_ID",
+                                  "feature_name", "classif", "step",
+                                  "group", "dataset","mode"])
+    # set access functions for list features
+    set_list_feature_access("feature",
+			    method("access_feature"))
+    # set access functions for list features
+    set_list_feature_access("classifier",
+			    method("access_feature"))
+  end
+  ###
+  # protected
+  #####
+  # access_feature
+  #
+  # access function for feature 'feature'
+  #
+  # assumed format in the config file:
+  #
+  #   feature = path [option]*
+  #
+  # i.e. first the name of the feature type to use, then
+  # optionally options associated with that feature,
+  # e.g. 'argrec': use that feature only when computing argrec
+  #
+  # the access function is called with parameter val_list, an array of
+  # string tuples, one string tuple for each feature defined.
+  # the first string in the tuple is the feature name, the rest are the options
+  #
+  # returns: a list of pairs [feature_name(string), options(array:string)]
+  # of defined features
+  def access_feature(val_list) # array:array:string: list of tuples defined in config file
+		               # for feature 'feature'
+    if val_list.nil?
+      []
+    else
+      val_list.map do |feature_descr_tuple|
+        [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
+      end
+    end
+  end
+end

data/test/frprep/test_opt_parser.rb ADDED

@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+require 'test/unit'
+require 'stringio' # for helper methods
+require 'frprep/opt_parser'
+include FrPrep
+class TestOptParser < Test::Unit::TestCase
+  def setup
+    @exp_file = 'test/frprep/data/prp_test.salsa'
+    @valid_opts = ['--expfile', @exp_file,
+                   '--help'
+                  ]
+  end
+  def test_public_methods
+    assert_respond_to(OptParser, :parse)
+  end
+  # It should return a FrPrepConfigData object.
+  def test_parse_method
+    input = ['-e', @exp_file]
+    return_value = OptParser.parse(input)
+    assert(return_value.instance_of?(FrPrepConfigData))
+  end
+  # It should reject the empty input and exit.
+  def test_empty_input
+    out, err = intercept_output do
+      assert_raises(SystemExit) { OptParser.parse([]) }
+    end
+    assert_match(/You have to provide some options./, err)
+  end
+  # It should accept correct options.
+  # Invalid options is the matter of OptionParser itself,
+  # do not test it here.
+  # We test only, that OP exits and does not raise an exception.
+  def test_accept_correct_options
+    # this options we should treat separately
+    @valid_opts.delete('--help')
+    assert_nothing_raised { OptParser.parse(@valid_opts) }
+    stdout, stderr = intercept_output do
+      assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
+    end
+    assert_match(/You have provided an invalid option:/, stderr)
+  end
+  # It should successfully exit with some options.
+  def test_successful_exit
+    quietly do
+      success_args = ['-h', '--help']
+      success_args.each do |arg|
+        assert_raises(SystemExit) { OptParser.parse(arg.split) }
+      end
+    end
+  end
+end
+################################################################################
+# It is a helper method, many testable units provide some verbose output
+# to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
+def quietly(&b)
+  begin
+    orig_stderr = $stderr.clone
+    orig_stdout = $stdout.clone
+    $stderr.reopen(File.new('/dev/null', 'w'))
+    $stdout.reopen(File.new('/dev/null', 'w'))
+    b.call
+  ensure
+    $stderr.reopen(orig_stderr)
+    $stdout.reopen(orig_stdout)
+  end
+end
+# It is a helper method for handling stdout and stderr as strings.
+def intercept_output
+  orig_stdout = $stdout
+  orig_stderr = $stderr
+  $stdout = StringIO.new
+  $stderr = StringIO.new
+  yield
+  return $stdout.string, $stderr.string
+ensure
+  $stdout = orig_stdout
+  $stderr = orig_stderr
+end

data/test/functional/functional_test_helper.rb ADDED

@@ -0,0 +1,58 @@
+require 'erb'
+# Setting $DEBUG will produce all external output.
+# Otherwise it is suppreced.
+module FunctionalTestHelper
+  PREF = 'test/functional/sample_experiment_files'
+  PRP_TEST_FILE            = "#{PREF}/prp_test.salsa"
+  PRP_TEST_FILE_FRED_STD   = "#{PREF}/prp_test.salsa.fred.standalone"
+  PRP_TEST_FILE_ROSY_STD   = "#{PREF}/prp_test.salsa.rosy.standalone"
+  PRP_TRAIN_FILE           = "#{PREF}/prp_train.salsa"
+  PRP_TRAIN_FILE_FRED_STD  = "#{PREF}/prp_train.salsa.fred.standalone"
+  PRP_TRAIN_FILE_ROSY_STD  = "#{PREF}/prp_train.salsa.rosy.standalone"
+  FRED_TEST_FILE  = 'test/functional/sample_experiment_files/fred_test.salsa'
+  FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
+  ROSY_TEST_FILE  = 'test/functional/sample_experiment_files/rosy_test.salsa'
+  ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
+  # Testing input for Preprocessor.
+  PRP_PLAININPUT       = "#{PREF}/prp_plaininput"
+  PRP_STXMLINPUT       = "#{PREF}/prp_stxmlinput"
+  PRP_TABINPUT         = "#{PREF}/prp_tabinput"
+  PRP_FNXMLINPUT       = "#{PREF}/prp_fnxmlinput"
+  PRP_FNCORPUSXMLINPUT = "#{PREF}/prp_fncorpusxmlinput"
+  # Testing output for Preprocessor.
+  PRP_STXMLOUTPUT = "#{PREF}/prp_stxmloutput"
+  PRP_TABOUTPUT   = "#{PREF}/prp_taboutput"
+  # Run an external process for functional testing and check the return code.
+  # <system> returns <true> if the external code exposes no errors.
+  # <@msg> is defined for every test object.
+  # @param cmd [String]
+  def execute(cmd)
+    unless $DEBUG
+      cmd = cmd + ' 1>/dev/null 2>&1'
+    end
+    status = system(cmd)
+    assert(status, @msg)
+  end
+  # Create a temporary exp file only for this test.
+  # Shalmaneser needs absolute paths, we provide them in exp files
+  # using templating.
+  def create_exp_file(file)
+    template = File.read("#{file}.erb")
+    text = ERB.new(template).result
+    File.open(file, 'w') do |f|
+      f.write(text)
+    end
+  end
+  def remove_exp_file(file)
+    File.delete(file)
+  end
+end

data/test/functional/test_fred.rb ADDED

@@ -0,0 +1,47 @@
+# -*- encoding: utf-8 -*-
+require 'test/unit'
+require 'functional/functional_test_helper'
+class TestFred < Test::Unit::TestCase
+  include FunctionalTestHelper
+  def setup
+    @msg = "Fred is doing bad, you've just broken something!"
+    @test_file = FRED_TEST_FILE
+    @train_file = FRED_TRAIN_FILE
+  end
+  def test_fred_testing_featurization
+    create_exp_file(@test_file)
+    create_exp_file(PRP_TEST_FILE_FRED_STD)
+    execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
+    remove_exp_file(@test_file)
+    remove_exp_file(PRP_TEST_FILE_FRED_STD)
+  end
+  def test_fred_testing_tests
+    create_exp_file(@test_file)
+    create_exp_file(PRP_TEST_FILE_FRED_STD)
+    execute("ruby -I lib bin/fred -t test -e #{@test_file}")
+    remove_exp_file(@test_file)
+    remove_exp_file(PRP_TEST_FILE_FRED_STD)
+  end
+  def test_fred_training_featurization
+    create_exp_file(@train_file)
+    create_exp_file(PRP_TRAIN_FILE_FRED_STD)
+    execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
+    remove_exp_file(@train_file)
+    remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
+  end
+  def test_fred_training_train
+    create_exp_file(@train_file)
+    create_exp_file(PRP_TRAIN_FILE_FRED_STD)
+    execute("ruby -I lib bin/fred -t train -e #{@train_file}")
+    remove_exp_file(@train_file)
+    remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
+  end
+end

data/test/functional/test_frprep.rb ADDED

@@ -0,0 +1,99 @@
+# -*- encoding: utf-8 -*-
+require 'test/unit'
+require 'functional/functional_test_helper'
+#require 'fileutils' # File.delete(), File.rename(), File.symlink()
+class TestFrprep < Test::Unit::TestCase
+  include FunctionalTestHelper
+  def setup
+    @msg        = "FrPrep is doing bad, you've just broken something!"
+    @test_file  = PRP_TEST_FILE
+    @train_file = PRP_TRAIN_FILE
+    @ptb        = 'lib/frprep/interfaces/berkeley_interface.rb'
+    #link_berkeley
+    ENV['SHALM_BERKELEY_MODEL'] = 'sc_dash_labeled_1_smoothing.gr'
+  end
+  def teardown
+    #unlink_berkeley
+  end
+  def test_frprep_testing
+    create_exp_file(@test_file)
+    execute("ruby -I lib bin/frprep -e #{@test_file}")
+    remove_exp_file(@test_file)
+  end
+  def test_frprep_training
+    create_exp_file(@train_file)
+    execute("ruby -I lib bin/frprep -e #{@train_file}")
+    remove_exp_file(@train_file)
+  end
+  # Testing input in different formats.
+  def test_frprep_plaininput
+    create_exp_file(PRP_PLAININPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_PLAININPUT}")
+    remove_exp_file(PRP_PLAININPUT)
+  end
+  def test_frprep_stxmlinput
+    create_exp_file(PRP_STXMLINPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_STXMLINPUT}")
+    remove_exp_file(PRP_STXMLINPUT)
+  end
+  def test_frprep_tabinput
+    create_exp_file(PRP_TABINPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_TABINPUT}")
+    remove_exp_file(PRP_TABINPUT)
+  end
+  def test_frprep_fncorpusxmlinput
+    create_exp_file(PRP_FNCORPUSXMLINPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_FNCORPUSXMLINPUT}")
+    remove_exp_file(PRP_FNCORPUSXMLINPUT)
+  end
+  def test_frprep_fnxmlinput
+    create_exp_file(PRP_FNXMLINPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_FNXMLINPUT}")
+    remove_exp_file(PRP_FNXMLINPUT)
+  end
+  # Testing output in different formats.
+  # We test only on German input assuming English input to work.
+  def test_frprep_stxmloutput
+    create_exp_file(PRP_STXMLOUTPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_STXMLOUTPUT}")
+    remove_exp_file(PRP_STXMLOUTPUT)
+  end
+  def test_frprep_taboutput
+    create_exp_file(PRP_TABOUTPUT)
+    execute("ruby -I lib bin/frprep -e #{PRP_TABOUTPUT}")
+    remove_exp_file(PRP_TABOUTPUT)
+  end
+  private
+  # Berkeley Parser takes a long time which is bad for testing.
+  # We ran it once and reuse the result file in our tests.
+  # Before every test we link the Berkeley interface to a stub
+  # with the BP invocation switched off.
+  def link_berkeley
+    File.rename(@ptb, "#{@ptb}.bak")
+    File.symlink(
+                 File.expand_path('test/functional/berkeley_interface.rb.stub'),
+                 File.expand_path(@ptb)
+                 )
+  end
+  # After testing we bring the right interface back, the program remains intact.
+  def unlink_berkeley
+    File.delete(@ptb)
+    File.rename("#{@ptb}.bak", @ptb)
+  end
+end

data/test/functional/test_rosy.rb ADDED

@@ -0,0 +1,40 @@
+# -*- encoding: utf-8 -*-
+require 'test/unit'
+require 'functional/functional_test_helper'
+class TestRosy < Test::Unit::TestCase
+  include FunctionalTestHelper
+  def setup
+    @msg = "Rosy is doing bad, you've just broken something!"
+  end
+  def test_rosy_testing
+    create_exp_file(ROSY_TEST_FILE)
+    create_exp_file(PRP_TEST_FILE_ROSY_STD)
+    execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
+    execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
+    remove_exp_file(ROSY_TEST_FILE)
+    remove_exp_file(PRP_TEST_FILE_ROSY_STD)
+  end
+  def test_rosy_training
+    create_exp_file(ROSY_TRAIN_FILE)
+    create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
+    execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
+    execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
+    execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
+    remove_exp_file(ROSY_TRAIN_FILE)
+    remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
+  end
+  def test_rosy_training_onestep
+    create_exp_file(ROSY_TRAIN_FILE)
+    create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
+    execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
+    execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
+    remove_exp_file(ROSY_TRAIN_FILE)
+    remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
+  end
+end