shalmaneser-rosy 1.2.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,121 @@
1
+ require 'common/config_data'
2
+
3
+ ##############################
4
+ # Class RosyConfigData
5
+ #
6
+ # inherits from ConfigData,
7
+ # sets features for ROSY
8
+
9
+ class RosyConfigData < ConfigData
10
+ CONFIG_DEFS = { # features
11
+ "feature" => "list",
12
+ "classifier" => "list",
13
+
14
+ "verbose" => "bool" ,
15
+ "enduser_mode" => "bool",
16
+
17
+ "experiment_ID" => "string",
18
+
19
+ "directory_input_train" => "string",
20
+ "directory_input_test" => "string",
21
+ "directory_output" => "string",
22
+
23
+ "preproc_descr_file_train" => "string",
24
+ "preproc_descr_file_test" => "string",
25
+ "external_descr_file" => "string",
26
+
27
+ "dbtype" => "string", # "mysql" or "sqlite"
28
+
29
+ "host" => "string", # DB access: sqlite only
30
+ "user" => "string",
31
+ "passwd" => "string",
32
+ "dbname" => "string",
33
+
34
+ "data_dir" => "string", # for external use
35
+ "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
36
+
37
+ "classifier_dir" => "string", # if present, special directory for classifiers
38
+
39
+ "classif_column_name" => "string",
40
+ "main_table_name" => "pattern",
41
+ "test_table_name" => "pattern",
42
+
43
+ "eval_file" => "pattern",
44
+ "log_file" => "pattern",
45
+ "failed_file" => "pattern",
46
+ "classifier_file" => "pattern",
47
+ "classifier_output_file" => "pattern",
48
+ "noval" => "string",
49
+
50
+
51
+ "split_nones" => "bool",
52
+ "print_eval_log" => "bool",
53
+ "assume_argrec_perfect" => "bool",
54
+ "xwise_argrec" => "string",
55
+ "xwise_arglab" => "string",
56
+ "xwise_onestep" => "string",
57
+
58
+ "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
59
+ "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
60
+ "prune" => "string", # pruning prior to argrec?
61
+
62
+ # Imported from PrepConfigData
63
+ 'do_postag' => 'bool',
64
+ 'do_lemmatize' => 'bool',
65
+ 'do_parse' => 'bool',
66
+ 'pos_tagger' => 'string',
67
+ 'lemmatizer' => 'string',
68
+ 'parser' => 'string'
69
+ }
70
+
71
+ def initialize(filename)
72
+ super(filename, CONFIG_DEFS, ["exp_ID", "test_ID", "split_ID",
73
+ "feature_name", "classif", "step",
74
+ "group", "dataset","mode"])
75
+
76
+ # set access functions for list features
77
+ set_list_feature_access("feature",
78
+ method("access_feature"))
79
+
80
+ # set access functions for list features
81
+ set_list_feature_access("classifier",
82
+ method("access_feature"))
83
+
84
+ end
85
+
86
+ ###
87
+ # protected
88
+
89
+ #####
90
+ # access_feature
91
+ #
92
+ # access function for feature 'feature'
93
+ #
94
+ # assumed format in the config file:
95
+ #
96
+ # feature = path [option]*
97
+ #
98
+ # i.e. first the name of the feature type to use, then
99
+ # optionally options associated with that feature,
100
+ # e.g. 'argrec': use that feature only when computing argrec
101
+ #
102
+ # the access function is called with parameter val_list, an array of
103
+ # string tuples, one string tuple for each feature defined.
104
+ # the first string in the tuple is the feature name, the rest are the options
105
+ #
106
+ # returns: a list of pairs [feature_name(string), options(array:string)]
107
+ # of defined features
108
+ def access_feature(val_list) # array:array:string: list of tuples defined in config file
109
+ # for feature 'feature'
110
+ if val_list.nil?
111
+ []
112
+ else
113
+ val_list.map do |feature_descr_tuple|
114
+ [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+
121
+
@@ -0,0 +1,94 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'test/unit'
4
+ require 'stringio' # for helper methods
5
+ require 'frprep/opt_parser'
6
+
7
+ include FrPrep
8
+
9
+ class TestOptParser < Test::Unit::TestCase
10
+
11
+ def setup
12
+ @exp_file = 'test/frprep/data/prp_test.salsa'
13
+ @valid_opts = ['--expfile', @exp_file,
14
+ '--help'
15
+ ]
16
+ end
17
+
18
+ def test_public_methods
19
+ assert_respond_to(OptParser, :parse)
20
+ end
21
+
22
+ # It should return a FrPrepConfigData object.
23
+ def test_parse_method
24
+ input = ['-e', @exp_file]
25
+ return_value = OptParser.parse(input)
26
+ assert(return_value.instance_of?(FrPrepConfigData))
27
+ end
28
+
29
+ # It should reject the empty input and exit.
30
+ def test_empty_input
31
+ out, err = intercept_output do
32
+ assert_raises(SystemExit) { OptParser.parse([]) }
33
+ end
34
+ assert_match(/You have to provide some options./, err)
35
+ end
36
+
37
+ # It should accept correct options.
38
+ # Invalid options is the matter of OptionParser itself,
39
+ # do not test it here.
40
+ # We test only, that OP exits and does not raise an exception.
41
+ def test_accept_correct_options
42
+ # this options we should treat separately
43
+ @valid_opts.delete('--help')
44
+ assert_nothing_raised { OptParser.parse(@valid_opts) }
45
+
46
+ stdout, stderr = intercept_output do
47
+ assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
48
+ end
49
+
50
+ assert_match(/You have provided an invalid option:/, stderr)
51
+ end
52
+
53
+ # It should successfully exit with some options.
54
+ def test_successful_exit
55
+ quietly do
56
+ success_args = ['-h', '--help']
57
+ success_args.each do |arg|
58
+ assert_raises(SystemExit) { OptParser.parse(arg.split) }
59
+ end
60
+ end
61
+ end
62
+
63
+ end
64
+ ################################################################################
65
+ # It is a helper method, many testable units provide some verbose output
66
+ # to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
67
+ def quietly(&b)
68
+ begin
69
+ orig_stderr = $stderr.clone
70
+ orig_stdout = $stdout.clone
71
+ $stderr.reopen(File.new('/dev/null', 'w'))
72
+ $stdout.reopen(File.new('/dev/null', 'w'))
73
+ b.call
74
+ ensure
75
+ $stderr.reopen(orig_stderr)
76
+ $stdout.reopen(orig_stdout)
77
+ end
78
+ end
79
+
80
+ # It is a helper method for handling stdout and stderr as strings.
81
+ def intercept_output
82
+ orig_stdout = $stdout
83
+ orig_stderr = $stderr
84
+ $stdout = StringIO.new
85
+ $stderr = StringIO.new
86
+
87
+ yield
88
+
89
+ return $stdout.string, $stderr.string
90
+ ensure
91
+ $stdout = orig_stdout
92
+ $stderr = orig_stderr
93
+ end
94
+
@@ -0,0 +1,58 @@
1
+ require 'erb'
2
+
3
+
4
+ # Setting $DEBUG will produce all external output.
5
+ # Otherwise it is suppreced.
6
+ module FunctionalTestHelper
7
+ PREF = 'test/functional/sample_experiment_files'
8
+
9
+ PRP_TEST_FILE = "#{PREF}/prp_test.salsa"
10
+ PRP_TEST_FILE_FRED_STD = "#{PREF}/prp_test.salsa.fred.standalone"
11
+ PRP_TEST_FILE_ROSY_STD = "#{PREF}/prp_test.salsa.rosy.standalone"
12
+ PRP_TRAIN_FILE = "#{PREF}/prp_train.salsa"
13
+ PRP_TRAIN_FILE_FRED_STD = "#{PREF}/prp_train.salsa.fred.standalone"
14
+ PRP_TRAIN_FILE_ROSY_STD = "#{PREF}/prp_train.salsa.rosy.standalone"
15
+
16
+ FRED_TEST_FILE = 'test/functional/sample_experiment_files/fred_test.salsa'
17
+ FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
18
+ ROSY_TEST_FILE = 'test/functional/sample_experiment_files/rosy_test.salsa'
19
+ ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
20
+
21
+ # Testing input for Preprocessor.
22
+ PRP_PLAININPUT = "#{PREF}/prp_plaininput"
23
+ PRP_STXMLINPUT = "#{PREF}/prp_stxmlinput"
24
+ PRP_TABINPUT = "#{PREF}/prp_tabinput"
25
+ PRP_FNXMLINPUT = "#{PREF}/prp_fnxmlinput"
26
+ PRP_FNCORPUSXMLINPUT = "#{PREF}/prp_fncorpusxmlinput"
27
+
28
+ # Testing output for Preprocessor.
29
+ PRP_STXMLOUTPUT = "#{PREF}/prp_stxmloutput"
30
+ PRP_TABOUTPUT = "#{PREF}/prp_taboutput"
31
+
32
+ # Run an external process for functional testing and check the return code.
33
+ # <system> returns <true> if the external code exposes no errors.
34
+ # <@msg> is defined for every test object.
35
+ # @param cmd [String]
36
+ def execute(cmd)
37
+ unless $DEBUG
38
+ cmd = cmd + ' 1>/dev/null 2>&1'
39
+ end
40
+ status = system(cmd)
41
+ assert(status, @msg)
42
+ end
43
+
44
+ # Create a temporary exp file only for this test.
45
+ # Shalmaneser needs absolute paths, we provide them in exp files
46
+ # using templating.
47
+ def create_exp_file(file)
48
+ template = File.read("#{file}.erb")
49
+ text = ERB.new(template).result
50
+ File.open(file, 'w') do |f|
51
+ f.write(text)
52
+ end
53
+ end
54
+
55
+ def remove_exp_file(file)
56
+ File.delete(file)
57
+ end
58
+ end
@@ -0,0 +1,47 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test/unit'
4
+ require 'functional/functional_test_helper'
5
+
6
+ class TestFred < Test::Unit::TestCase
7
+
8
+ include FunctionalTestHelper
9
+
10
+ def setup
11
+ @msg = "Fred is doing bad, you've just broken something!"
12
+ @test_file = FRED_TEST_FILE
13
+ @train_file = FRED_TRAIN_FILE
14
+ end
15
+
16
+ def test_fred_testing_featurization
17
+ create_exp_file(@test_file)
18
+ create_exp_file(PRP_TEST_FILE_FRED_STD)
19
+ execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
20
+ remove_exp_file(@test_file)
21
+ remove_exp_file(PRP_TEST_FILE_FRED_STD)
22
+ end
23
+
24
+ def test_fred_testing_tests
25
+ create_exp_file(@test_file)
26
+ create_exp_file(PRP_TEST_FILE_FRED_STD)
27
+ execute("ruby -I lib bin/fred -t test -e #{@test_file}")
28
+ remove_exp_file(@test_file)
29
+ remove_exp_file(PRP_TEST_FILE_FRED_STD)
30
+ end
31
+
32
+ def test_fred_training_featurization
33
+ create_exp_file(@train_file)
34
+ create_exp_file(PRP_TRAIN_FILE_FRED_STD)
35
+ execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
36
+ remove_exp_file(@train_file)
37
+ remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
38
+ end
39
+
40
+ def test_fred_training_train
41
+ create_exp_file(@train_file)
42
+ create_exp_file(PRP_TRAIN_FILE_FRED_STD)
43
+ execute("ruby -I lib bin/fred -t train -e #{@train_file}")
44
+ remove_exp_file(@train_file)
45
+ remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
46
+ end
47
+ end
@@ -0,0 +1,99 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test/unit'
4
+ require 'functional/functional_test_helper'
5
+ #require 'fileutils' # File.delete(), File.rename(), File.symlink()
6
+
7
+ class TestFrprep < Test::Unit::TestCase
8
+
9
+ include FunctionalTestHelper
10
+
11
+ def setup
12
+ @msg = "FrPrep is doing bad, you've just broken something!"
13
+ @test_file = PRP_TEST_FILE
14
+ @train_file = PRP_TRAIN_FILE
15
+ @ptb = 'lib/frprep/interfaces/berkeley_interface.rb'
16
+ #link_berkeley
17
+ ENV['SHALM_BERKELEY_MODEL'] = 'sc_dash_labeled_1_smoothing.gr'
18
+ end
19
+
20
+ def teardown
21
+ #unlink_berkeley
22
+ end
23
+ def test_frprep_testing
24
+ create_exp_file(@test_file)
25
+ execute("ruby -I lib bin/frprep -e #{@test_file}")
26
+ remove_exp_file(@test_file)
27
+ end
28
+
29
+ def test_frprep_training
30
+ create_exp_file(@train_file)
31
+ execute("ruby -I lib bin/frprep -e #{@train_file}")
32
+ remove_exp_file(@train_file)
33
+ end
34
+
35
+ # Testing input in different formats.
36
+ def test_frprep_plaininput
37
+ create_exp_file(PRP_PLAININPUT)
38
+ execute("ruby -I lib bin/frprep -e #{PRP_PLAININPUT}")
39
+ remove_exp_file(PRP_PLAININPUT)
40
+ end
41
+
42
+ def test_frprep_stxmlinput
43
+ create_exp_file(PRP_STXMLINPUT)
44
+ execute("ruby -I lib bin/frprep -e #{PRP_STXMLINPUT}")
45
+ remove_exp_file(PRP_STXMLINPUT)
46
+ end
47
+
48
+ def test_frprep_tabinput
49
+ create_exp_file(PRP_TABINPUT)
50
+ execute("ruby -I lib bin/frprep -e #{PRP_TABINPUT}")
51
+ remove_exp_file(PRP_TABINPUT)
52
+ end
53
+
54
+ def test_frprep_fncorpusxmlinput
55
+ create_exp_file(PRP_FNCORPUSXMLINPUT)
56
+ execute("ruby -I lib bin/frprep -e #{PRP_FNCORPUSXMLINPUT}")
57
+ remove_exp_file(PRP_FNCORPUSXMLINPUT)
58
+ end
59
+
60
+ def test_frprep_fnxmlinput
61
+ create_exp_file(PRP_FNXMLINPUT)
62
+ execute("ruby -I lib bin/frprep -e #{PRP_FNXMLINPUT}")
63
+ remove_exp_file(PRP_FNXMLINPUT)
64
+ end
65
+
66
+ # Testing output in different formats.
67
+ # We test only on German input assuming English input to work.
68
+ def test_frprep_stxmloutput
69
+ create_exp_file(PRP_STXMLOUTPUT)
70
+ execute("ruby -I lib bin/frprep -e #{PRP_STXMLOUTPUT}")
71
+ remove_exp_file(PRP_STXMLOUTPUT)
72
+ end
73
+
74
+ def test_frprep_taboutput
75
+ create_exp_file(PRP_TABOUTPUT)
76
+ execute("ruby -I lib bin/frprep -e #{PRP_TABOUTPUT}")
77
+ remove_exp_file(PRP_TABOUTPUT)
78
+ end
79
+
80
+
81
+ private
82
+ # Berkeley Parser takes a long time which is bad for testing.
83
+ # We ran it once and reuse the result file in our tests.
84
+ # Before every test we link the Berkeley interface to a stub
85
+ # with the BP invocation switched off.
86
+ def link_berkeley
87
+ File.rename(@ptb, "#{@ptb}.bak")
88
+ File.symlink(
89
+ File.expand_path('test/functional/berkeley_interface.rb.stub'),
90
+ File.expand_path(@ptb)
91
+ )
92
+ end
93
+
94
+ # After testing we bring the right interface back, the program remains intact.
95
+ def unlink_berkeley
96
+ File.delete(@ptb)
97
+ File.rename("#{@ptb}.bak", @ptb)
98
+ end
99
+ end
@@ -0,0 +1,40 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test/unit'
4
+ require 'functional/functional_test_helper'
5
+
6
+ class TestRosy < Test::Unit::TestCase
7
+ include FunctionalTestHelper
8
+
9
+ def setup
10
+ @msg = "Rosy is doing bad, you've just broken something!"
11
+ end
12
+
13
+ def test_rosy_testing
14
+ create_exp_file(ROSY_TEST_FILE)
15
+ create_exp_file(PRP_TEST_FILE_ROSY_STD)
16
+ execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
17
+ execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
18
+ remove_exp_file(ROSY_TEST_FILE)
19
+ remove_exp_file(PRP_TEST_FILE_ROSY_STD)
20
+ end
21
+
22
+ def test_rosy_training
23
+ create_exp_file(ROSY_TRAIN_FILE)
24
+ create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
25
+ execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
26
+ execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
27
+ execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
28
+ remove_exp_file(ROSY_TRAIN_FILE)
29
+ remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
30
+ end
31
+
32
+ def test_rosy_training_onestep
33
+ create_exp_file(ROSY_TRAIN_FILE)
34
+ create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
35
+ execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
36
+ execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
37
+ remove_exp_file(ROSY_TRAIN_FILE)
38
+ remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
39
+ end
40
+ end