RubyGems - shalmaneser - Versions diffs - 1.2.0.rc1 → 1.2.0.rc2 - Mend

shalmaneser 1.2.0.rc1 → 1.2.0.rc2

Files changed (30) hide show

checksums.yaml +4 -4
data/README.md +26 -8
data/doc/SB_README +57 -0
data/doc/exp_files_description.txt +160 -0
data/doc/fred.pdf +0 -0
data/doc/index.md +120 -0
data/doc/salsa_tool.pdf +0 -0
data/doc/salsatigerxml.pdf +0 -0
data/doc/shal_doc.pdf +0 -0
data/doc/shal_lrec.pdf +0 -0
data/lib/ext/maxent/Classify.class +0 -0
data/lib/ext/maxent/Train.class +0 -0
data/lib/frprep/TreetaggerInterface.rb +4 -4
data/lib/shalmaneser/version.rb +1 -1
metadata +41 -48
data/test/frprep/test_opt_parser.rb +0 -94
data/test/functional/functional_test_helper.rb +0 -40
data/test/functional/sample_experiment_files/fred_test.salsa.erb +0 -122
data/test/functional/sample_experiment_files/fred_train.salsa.erb +0 -135
data/test/functional/sample_experiment_files/prp_test.salsa.erb +0 -138
data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +0 -120
data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +0 -120
data/test/functional/sample_experiment_files/prp_train.salsa.erb +0 -138
data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +0 -138
data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +0 -138
data/test/functional/sample_experiment_files/rosy_test.salsa.erb +0 -257
data/test/functional/sample_experiment_files/rosy_train.salsa.erb +0 -259
data/test/functional/test_fred.rb +0 -47
data/test/functional/test_frprep.rb +0 -52
data/test/functional/test_rosy.rb +0 -40

data/test/functional/sample_experiment_files/rosy_train.salsa.erb DELETED Viewed

@@ -1,259 +0,0 @@
-#################################################
-# This is a sample experiment file
-# with explanations of all features
-# that can be set for the ROSY system.
-#
-# To start your own experiment,
-# replace all occurrences of
-# %SOMETHING% or %PATH% or %PARAMETERS%
-# by values of your choice.
-#
-# Experiment file lines that start with '#'
-# are comments and are ignored. Empty lines are ignored as well.
-########################
-# Experiment description
-#
-##
-# Experiment ID:
-# Uniquely identifies files and database tables
-# of this experiment.
-# The experiment ID is a word (no spaces) of
-# letters in [A-Za-z_].
-experiment_ID = rosy_train
-# Enduser mode?
-# The idea is that the enduser will only _apply_
-# pre-trained classifiers. So in enduser mode many
-# options are disallowed.
-enduser_mode = false
-# directories
-# - data directory: where Rosy puts its internal data
-# - input directory:
-#   where Rosy reads its input SalsaTigerXML data.
-#   One directory each for the training and the test data
-# - output directory:
-#   where Rosy writes its output SalsaTigerXML data:
-#   same frames as in the input data, but frame elements newly
-#   assigned.
-#   If no output directory is given, output is to
-#   <data_dir>/<experiment_ID>/output/
-# - classifier_dir: If present, this is where trained classifiers
-#   are written.
-#   Otherwise they are written to <data_dir>/<experiment_id>/classif_dir
-data_dir = <%= File.expand_path('test/functional/output') %>
-directory_input_train = <%= File.expand_path('test/functional/input/rosy/train.salsa') %>
-#directory_input_test = <%= File.expand_path('test/functional/output/exp_fred_salsa/output/stxml') %>
-#directory_output = <%= File.expand_path('test/functional/output/exp_rosy_salsa/output') %>
-##
-# Preprocessing settings:
-# frprep experiment files for training and test data.
-preproc_descr_file_train = <%= File.expand_path('test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone') %>
-#preproc_descr_file_test = <%= File.expand_path('test/functional/sample_experiment_files/prp_test.salsa') %>
-########################
-# features
-#
-# Please specify all features that you would like
-# Rosy to compute.
-# Note: The system distinguishes between features to be
-#   computed and features to be included in the model,
-#   so you can compute features once and then vary features
-#   included in the model.
-#
-# Format for each feature specification:
-#  feature = <feature_name> [dontuse | argrec | arglab | onestep]
-#
-# dontuse: the feature is computed but not included in the model.
-# argrec, arglab, onestep: the feature is used only in this
-#          processing step
-#
-#
-# The set of features computed must stay the same throughout
-# an experiment (or the match of experiment file and
-# database table will fail), but the set of features included
-# in the model can be varied.
-#
-# See below for a list of all features currently available in the system.
-feature = pt_path
-feature = gf_path
-feature = path
-feature = path_length
-feature = pt_combined_path
-feature = gf_combined_path
-feature = combined_path
-feature = pt_partial_path
-feature = gf_partial_path
-feature = partial_path
-feature = pt_gvpath
-feature = gf_gvpath
-feature = gvpath
-feature = ancestor_rule
-feature = relpos
-feature = pt
-feature = gf
-feature = father_pt
-feature = frame
-feature = target
-feature = target_pos
-feature = target_voice
-feature = gov_verb
-feature = prep
-feature = const_head
-feature = const_head_pos
-feature = icont_word
-feature = firstword
-feature = lastword
-feature = leftsib
-feature = rightsib
-feature = worddistance
-feature = ismaxproj
-feature = nearest_node
-feature = prune
-########################
-# classifiers
-#
-# Please specify each classifier type you want to use.
-# If you specify more than one classifier, classifier combination
-# is used.
-#
-# Format for each classifier specification:
-#   classifier = <classifier_name> <path> [<parameters>]
-#
-# Possible values for <classifier_name> at the moment:
-#   timbl (memory-based learning),
-#   maxent (openlp maxent system)
-#
-# Samples:
-# classifier = timbl /prog/MachineLearning/Timbl5/
-# classifier = maxent /prog/maxent-2.4.0 /prog/shalmaneser/program/tools/maxent
-classifier = maxent <%= File.expand_path('tools/maxent/maxent-2.4.0') %>
-########################
-# further settings
-# Pruning: Identify constituents that are very unlikely
-# to instantiate a semantic role, and prune them prior
-# to the training/application of classifiers?
-#
-# Pruning methods available at the moment:
-#   prune: Xue/Palmer EMNLP 2004, adapted to fit each individual parser
-#
-# To enable pruning, set "prune" to the pruning method of your choice,
-# and also compute the feature of the same name -- see
-# feature list below.
-# To disable pruning, comment out the next line.
-prune = prune
-# verbose mode
-verbose = true
-# data adaptation:
-# correct training labels to
-# match syntax better?
-fe_syn_repair = true
-fe_rel_repair = false
-# xwise: For each classification step (argrec, arglab, onestep)
-# you can set the granularity of training:
-# - by frame (frame)
-# - by target part of speech or (target_pos)
-# - by target lemma. (target)
-#
-# these three settings can be combined, e.g.
-#   xwise_argrec = target_pos frame
-# to train argrec frame-wise and split each frame by target POS.
-#
-# If no value is given for xwise_<step>, the default is "frame".
-xwise_argrec = frame
-xwise_arglab = frame
-xwise_onestep = frame
-# assume_argrec_perfect: by default, this is false.
-#
-#   Set this to true
-#   to perform the arglab (argument labeling) step
-#   on all instances that actually are FEs
-#   rather than on all instances that the argrec step
-#   has judged to be FEs.
-assume_argrec_perfect = false
-# split_nones: set to true
-#   to split the NONE target class into:
-#     NONE left of target,
-#     NONE right of target
-#   because the NONE class has so many more instances
-#   than any other.
-split_nones = true
-# print_eval_log: set to true to print individual correctness
-# judgments for each instance evaluated
-print_eval_log = true
-# External data source:
-#
-# Rosy can integrate data computed by additional systems
-# provided that they all use a common experiment file
-# for external data to determine where they put their data.
-# Rosy needs the path to that experiment file.
-#
-# (May be left unset when no external data is used)
-#external_descr_file = %PATH%
-########################
-# rosy internal data - please don't change
-# Database access:
-# dbtype: type of database, either mysql
-#   for a MySQL server, or sqlite for SQLite.
-#
-# if dbtype == mysql, set access parameters:
-#   host: database server
-#   user: user name to use
-#   passwd: password for user
-#   dbname: database where all Rosy's tables will be stored
-dbtype = mysql
-host = localhost
-user = shalm
-passwd = 12345
-dbname = shalm11
-# classifier output columns in the tables all start
-# with this prefix
-classif_column_name = classif
-# pattern for constructing the names
-# of the DB tables with training data (main_table_name)
-# and test data (test_table_name)
-main_table_name = rosy_<exp_ID>_main
-test_table_name = rosy_<exp_ID>_<test_ID>
-# string to use for "no value for this feature"
-# as well as "no FE for this instance"
-noval = NONE
-# pattern for constructing the names
-# of classifier files and classifier output files
-classifier_file = classif.<classif>.<group>
-classifier_output_file = classout.<classif>.<group>.<dataset>
-# pattern for constructing the names
-# of the evaluation file and the evaluation log file
-eval_file = eval.<exp_ID>.<step>.<test_ID>
-log_file = eval_log.<exp_ID>.<step>.<test_ID>
-# pattern for constructing the names
-# of the files with failed parses
-failed_file = parsefail.<exp_ID>.<split_ID>.<dataset>

data/test/functional/test_fred.rb DELETED Viewed

@@ -1,47 +0,0 @@
-# -*- encoding: utf-8 -*-
-require 'test/unit'
-require 'functional/functional_test_helper'
-class TestFred < Test::Unit::TestCase
-  include FunctionalTestHelper
-  def setup
-    @msg = "Fred is doing bad, you've just broken something!"
-    @test_file = FRED_TEST_FILE
-    @train_file = FRED_TRAIN_FILE
-  end
-  def test_fred_testing_featurization
-    create_exp_file(@test_file)
-    create_exp_file(PRP_TEST_FILE_FRED_STD)
-    execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
-    remove_exp_file(@test_file)
-    remove_exp_file(PRP_TEST_FILE_FRED_STD)
-  end
-  def test_fred_testing_tests
-    create_exp_file(@test_file)
-    create_exp_file(PRP_TEST_FILE_FRED_STD)
-    execute("ruby -I lib bin/fred -t test -e #{@test_file}")
-    remove_exp_file(@test_file)
-    remove_exp_file(PRP_TEST_FILE_FRED_STD)
-  end
-  def test_fred_training_featurization
-    create_exp_file(@train_file)
-    create_exp_file(PRP_TRAIN_FILE_FRED_STD)
-    execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
-    remove_exp_file(@train_file)
-    remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
-  end
-  def test_fred_training_train
-    create_exp_file(@train_file)
-    create_exp_file(PRP_TRAIN_FILE_FRED_STD)
-    execute("ruby -I lib bin/fred -t train -e #{@train_file}")
-    remove_exp_file(@train_file)
-    remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
-  end
-end

data/test/functional/test_frprep.rb DELETED Viewed

@@ -1,52 +0,0 @@
-# -*- encoding: utf-8 -*-
-require 'test/unit'
-require 'functional/functional_test_helper'
-#require 'fileutils' # File.delete(), File.rename(), File.symlink()
-class TestFrprep < Test::Unit::TestCase
-  include FunctionalTestHelper
-  def setup
-    @msg        = "FrPrep is doing bad, you've just broken something!"
-    @test_file  = PRP_TEST_FILE
-    @train_file = PRP_TRAIN_FILE
-    @ptb        = 'lib/frprep/interfaces/berkeley_interface.rb'
-    link_berkeley
-  end
-  def teardown
-    unlink_berkeley
-  end
-  def test_frprep_testing
-    create_exp_file(@test_file)
-    execute("ruby -I lib bin/frprep -e #{@test_file}")
-    remove_exp_file(@test_file)
-  end
-  def test_frprep_training
-    create_exp_file(@train_file)
-    execute("ruby -I lib bin/frprep -e #{@train_file}")
-    remove_exp_file(@train_file)
-  end
-  private
-  # Berkeley Parser takes a long time which is bad for testing.
-  # We ran it once and reuse the result file in our tests.
-  # Before every test we link the Berkeley interface to a stub
-  # with the BP invocation switched off.
-  def link_berkeley
-    File.rename(@ptb, "#{@ptb}.bak")
-    File.symlink(
-                 File.expand_path('test/functional/berkeley_interface.rb.stub'),
-                 File.expand_path(@ptb)
-                 )
-  end
-  # After testing we bring the right interface back, the program remains intact.
-  def unlink_berkeley
-    File.delete(@ptb)
-    File.rename("#{@ptb}.bak", @ptb)
-  end
-end

data/test/functional/test_rosy.rb DELETED Viewed

@@ -1,40 +0,0 @@
-# -*- encoding: utf-8 -*-
-require 'test/unit'
-require 'functional/functional_test_helper'
-class TestRosy < Test::Unit::TestCase
-  include FunctionalTestHelper
-  def setup
-    @msg = "Rosy is doing bad, you've just broken something!"
-  end
-  def test_rosy_testing
-    create_exp_file(ROSY_TEST_FILE)
-    create_exp_file(PRP_TEST_FILE_ROSY_STD)
-    execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
-    execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
-    remove_exp_file(ROSY_TEST_FILE)
-    remove_exp_file(PRP_TEST_FILE_ROSY_STD)
-  end
-  def test_rosy_training
-    create_exp_file(ROSY_TRAIN_FILE)
-    create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
-    execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
-    execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
-    execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
-    remove_exp_file(ROSY_TRAIN_FILE)
-    remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
-  end
-  def test_rosy_training_onestep
-    create_exp_file(ROSY_TRAIN_FILE)
-    create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
-    execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
-    execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
-    remove_exp_file(ROSY_TRAIN_FILE)
-    remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
-  end
-end