shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/rosy +14 -7
  4. data/lib/rosy/FailedParses.rb +22 -20
  5. data/lib/rosy/FeatureInfo.rb +35 -31
  6. data/lib/rosy/GfInduce.rb +132 -130
  7. data/lib/rosy/GfInduceFeature.rb +86 -68
  8. data/lib/rosy/InputData.rb +59 -55
  9. data/lib/rosy/RosyConfusability.rb +47 -40
  10. data/lib/rosy/RosyEval.rb +55 -55
  11. data/lib/rosy/RosyFeatureExtractors.rb +295 -290
  12. data/lib/rosy/RosyFeaturize.rb +54 -67
  13. data/lib/rosy/RosyInspect.rb +52 -50
  14. data/lib/rosy/RosyIterator.rb +73 -67
  15. data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
  16. data/lib/rosy/RosyPruning.rb +39 -31
  17. data/lib/rosy/RosyServices.rb +116 -115
  18. data/lib/rosy/RosySplit.rb +55 -53
  19. data/lib/rosy/RosyTask.rb +7 -3
  20. data/lib/rosy/RosyTest.rb +174 -191
  21. data/lib/rosy/RosyTrain.rb +46 -50
  22. data/lib/rosy/RosyTrainingTestTable.rb +101 -99
  23. data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
  24. data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
  25. data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
  26. data/lib/rosy/external_feature_extractor.rb +35 -0
  27. data/lib/rosy/opt_parser.rb +231 -201
  28. data/lib/rosy/rosy.rb +63 -64
  29. data/lib/rosy/rosy_conventions.rb +66 -0
  30. data/lib/rosy/rosy_error.rb +15 -0
  31. data/lib/rosy/var_var_restriction.rb +16 -0
  32. data/lib/shalmaneser/rosy.rb +1 -0
  33. metadata +26 -19
  34. data/lib/rosy/ExternalConfigData.rb +0 -58
  35. data/lib/rosy/View.rb +0 -418
  36. data/lib/rosy/rosy_config_data.rb +0 -121
  37. data/test/frprep/test_opt_parser.rb +0 -94
  38. data/test/functional/functional_test_helper.rb +0 -58
  39. data/test/functional/test_fred.rb +0 -47
  40. data/test/functional/test_frprep.rb +0 -99
  41. data/test/functional/test_rosy.rb +0 -40
@@ -1,121 +0,0 @@
1
- require 'common/config_data'
2
-
3
- ##############################
4
- # Class RosyConfigData
5
- #
6
- # inherits from ConfigData,
7
- # sets features for ROSY
8
-
9
- class RosyConfigData < ConfigData
10
- CONFIG_DEFS = { # features
11
- "feature" => "list",
12
- "classifier" => "list",
13
-
14
- "verbose" => "bool" ,
15
- "enduser_mode" => "bool",
16
-
17
- "experiment_ID" => "string",
18
-
19
- "directory_input_train" => "string",
20
- "directory_input_test" => "string",
21
- "directory_output" => "string",
22
-
23
- "preproc_descr_file_train" => "string",
24
- "preproc_descr_file_test" => "string",
25
- "external_descr_file" => "string",
26
-
27
- "dbtype" => "string", # "mysql" or "sqlite"
28
-
29
- "host" => "string", # DB access: sqlite only
30
- "user" => "string",
31
- "passwd" => "string",
32
- "dbname" => "string",
33
-
34
- "data_dir" => "string", # for external use
35
- "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
36
-
37
- "classifier_dir" => "string", # if present, special directory for classifiers
38
-
39
- "classif_column_name" => "string",
40
- "main_table_name" => "pattern",
41
- "test_table_name" => "pattern",
42
-
43
- "eval_file" => "pattern",
44
- "log_file" => "pattern",
45
- "failed_file" => "pattern",
46
- "classifier_file" => "pattern",
47
- "classifier_output_file" => "pattern",
48
- "noval" => "string",
49
-
50
-
51
- "split_nones" => "bool",
52
- "print_eval_log" => "bool",
53
- "assume_argrec_perfect" => "bool",
54
- "xwise_argrec" => "string",
55
- "xwise_arglab" => "string",
56
- "xwise_onestep" => "string",
57
-
58
- "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
59
- "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
60
- "prune" => "string", # pruning prior to argrec?
61
-
62
- # Imported from PrepConfigData
63
- 'do_postag' => 'bool',
64
- 'do_lemmatize' => 'bool',
65
- 'do_parse' => 'bool',
66
- 'pos_tagger' => 'string',
67
- 'lemmatizer' => 'string',
68
- 'parser' => 'string'
69
- }
70
-
71
- def initialize(filename)
72
- super(filename, CONFIG_DEFS, ["exp_ID", "test_ID", "split_ID",
73
- "feature_name", "classif", "step",
74
- "group", "dataset","mode"])
75
-
76
- # set access functions for list features
77
- set_list_feature_access("feature",
78
- method("access_feature"))
79
-
80
- # set access functions for list features
81
- set_list_feature_access("classifier",
82
- method("access_feature"))
83
-
84
- end
85
-
86
- ###
87
- # protected
88
-
89
- #####
90
- # access_feature
91
- #
92
- # access function for feature 'feature'
93
- #
94
- # assumed format in the config file:
95
- #
96
- # feature = path [option]*
97
- #
98
- # i.e. first the name of the feature type to use, then
99
- # optionally options associated with that feature,
100
- # e.g. 'argrec': use that feature only when computing argrec
101
- #
102
- # the access function is called with parameter val_list, an array of
103
- # string tuples, one string tuple for each feature defined.
104
- # the first string in the tuple is the feature name, the rest are the options
105
- #
106
- # returns: a list of pairs [feature_name(string), options(array:string)]
107
- # of defined features
108
- def access_feature(val_list) # array:array:string: list of tuples defined in config file
109
- # for feature 'feature'
110
- if val_list.nil?
111
- []
112
- else
113
- val_list.map do |feature_descr_tuple|
114
- [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
115
- end
116
- end
117
- end
118
- end
119
-
120
-
121
-
@@ -1,94 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'stringio' # for helper methods
5
- require 'frprep/opt_parser'
6
-
7
- include FrPrep
8
-
9
- class TestOptParser < Test::Unit::TestCase
10
-
11
- def setup
12
- @exp_file = 'test/frprep/data/prp_test.salsa'
13
- @valid_opts = ['--expfile', @exp_file,
14
- '--help'
15
- ]
16
- end
17
-
18
- def test_public_methods
19
- assert_respond_to(OptParser, :parse)
20
- end
21
-
22
- # It should return a FrPrepConfigData object.
23
- def test_parse_method
24
- input = ['-e', @exp_file]
25
- return_value = OptParser.parse(input)
26
- assert(return_value.instance_of?(FrPrepConfigData))
27
- end
28
-
29
- # It should reject the empty input and exit.
30
- def test_empty_input
31
- out, err = intercept_output do
32
- assert_raises(SystemExit) { OptParser.parse([]) }
33
- end
34
- assert_match(/You have to provide some options./, err)
35
- end
36
-
37
- # It should accept correct options.
38
- # Invalid options is the matter of OptionParser itself,
39
- # do not test it here.
40
- # We test only, that OP exits and does not raise an exception.
41
- def test_accept_correct_options
42
- # this options we should treat separately
43
- @valid_opts.delete('--help')
44
- assert_nothing_raised { OptParser.parse(@valid_opts) }
45
-
46
- stdout, stderr = intercept_output do
47
- assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
48
- end
49
-
50
- assert_match(/You have provided an invalid option:/, stderr)
51
- end
52
-
53
- # It should successfully exit with some options.
54
- def test_successful_exit
55
- quietly do
56
- success_args = ['-h', '--help']
57
- success_args.each do |arg|
58
- assert_raises(SystemExit) { OptParser.parse(arg.split) }
59
- end
60
- end
61
- end
62
-
63
- end
64
- ################################################################################
65
- # It is a helper method, many testable units provide some verbose output
66
- # to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
67
- def quietly(&b)
68
- begin
69
- orig_stderr = $stderr.clone
70
- orig_stdout = $stdout.clone
71
- $stderr.reopen(File.new('/dev/null', 'w'))
72
- $stdout.reopen(File.new('/dev/null', 'w'))
73
- b.call
74
- ensure
75
- $stderr.reopen(orig_stderr)
76
- $stdout.reopen(orig_stdout)
77
- end
78
- end
79
-
80
- # It is a helper method for handling stdout and stderr as strings.
81
- def intercept_output
82
- orig_stdout = $stdout
83
- orig_stderr = $stderr
84
- $stdout = StringIO.new
85
- $stderr = StringIO.new
86
-
87
- yield
88
-
89
- return $stdout.string, $stderr.string
90
- ensure
91
- $stdout = orig_stdout
92
- $stderr = orig_stderr
93
- end
94
-
@@ -1,58 +0,0 @@
1
- require 'erb'
2
-
3
-
4
- # Setting $DEBUG will produce all external output.
5
- # Otherwise it is suppreced.
6
- module FunctionalTestHelper
7
- PREF = 'test/functional/sample_experiment_files'
8
-
9
- PRP_TEST_FILE = "#{PREF}/prp_test.salsa"
10
- PRP_TEST_FILE_FRED_STD = "#{PREF}/prp_test.salsa.fred.standalone"
11
- PRP_TEST_FILE_ROSY_STD = "#{PREF}/prp_test.salsa.rosy.standalone"
12
- PRP_TRAIN_FILE = "#{PREF}/prp_train.salsa"
13
- PRP_TRAIN_FILE_FRED_STD = "#{PREF}/prp_train.salsa.fred.standalone"
14
- PRP_TRAIN_FILE_ROSY_STD = "#{PREF}/prp_train.salsa.rosy.standalone"
15
-
16
- FRED_TEST_FILE = 'test/functional/sample_experiment_files/fred_test.salsa'
17
- FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
18
- ROSY_TEST_FILE = 'test/functional/sample_experiment_files/rosy_test.salsa'
19
- ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
20
-
21
- # Testing input for Preprocessor.
22
- PRP_PLAININPUT = "#{PREF}/prp_plaininput"
23
- PRP_STXMLINPUT = "#{PREF}/prp_stxmlinput"
24
- PRP_TABINPUT = "#{PREF}/prp_tabinput"
25
- PRP_FNXMLINPUT = "#{PREF}/prp_fnxmlinput"
26
- PRP_FNCORPUSXMLINPUT = "#{PREF}/prp_fncorpusxmlinput"
27
-
28
- # Testing output for Preprocessor.
29
- PRP_STXMLOUTPUT = "#{PREF}/prp_stxmloutput"
30
- PRP_TABOUTPUT = "#{PREF}/prp_taboutput"
31
-
32
- # Run an external process for functional testing and check the return code.
33
- # <system> returns <true> if the external code exposes no errors.
34
- # <@msg> is defined for every test object.
35
- # @param cmd [String]
36
- def execute(cmd)
37
- unless $DEBUG
38
- cmd = cmd + ' 1>/dev/null 2>&1'
39
- end
40
- status = system(cmd)
41
- assert(status, @msg)
42
- end
43
-
44
- # Create a temporary exp file only for this test.
45
- # Shalmaneser needs absolute paths, we provide them in exp files
46
- # using templating.
47
- def create_exp_file(file)
48
- template = File.read("#{file}.erb")
49
- text = ERB.new(template).result
50
- File.open(file, 'w') do |f|
51
- f.write(text)
52
- end
53
- end
54
-
55
- def remove_exp_file(file)
56
- File.delete(file)
57
- end
58
- end
@@ -1,47 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'functional/functional_test_helper'
5
-
6
- class TestFred < Test::Unit::TestCase
7
-
8
- include FunctionalTestHelper
9
-
10
- def setup
11
- @msg = "Fred is doing bad, you've just broken something!"
12
- @test_file = FRED_TEST_FILE
13
- @train_file = FRED_TRAIN_FILE
14
- end
15
-
16
- def test_fred_testing_featurization
17
- create_exp_file(@test_file)
18
- create_exp_file(PRP_TEST_FILE_FRED_STD)
19
- execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
20
- remove_exp_file(@test_file)
21
- remove_exp_file(PRP_TEST_FILE_FRED_STD)
22
- end
23
-
24
- def test_fred_testing_tests
25
- create_exp_file(@test_file)
26
- create_exp_file(PRP_TEST_FILE_FRED_STD)
27
- execute("ruby -I lib bin/fred -t test -e #{@test_file}")
28
- remove_exp_file(@test_file)
29
- remove_exp_file(PRP_TEST_FILE_FRED_STD)
30
- end
31
-
32
- def test_fred_training_featurization
33
- create_exp_file(@train_file)
34
- create_exp_file(PRP_TRAIN_FILE_FRED_STD)
35
- execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
36
- remove_exp_file(@train_file)
37
- remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
38
- end
39
-
40
- def test_fred_training_train
41
- create_exp_file(@train_file)
42
- create_exp_file(PRP_TRAIN_FILE_FRED_STD)
43
- execute("ruby -I lib bin/fred -t train -e #{@train_file}")
44
- remove_exp_file(@train_file)
45
- remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
46
- end
47
- end
@@ -1,99 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'functional/functional_test_helper'
5
- #require 'fileutils' # File.delete(), File.rename(), File.symlink()
6
-
7
- class TestFrprep < Test::Unit::TestCase
8
-
9
- include FunctionalTestHelper
10
-
11
- def setup
12
- @msg = "FrPrep is doing bad, you've just broken something!"
13
- @test_file = PRP_TEST_FILE
14
- @train_file = PRP_TRAIN_FILE
15
- @ptb = 'lib/frprep/interfaces/berkeley_interface.rb'
16
- #link_berkeley
17
- ENV['SHALM_BERKELEY_MODEL'] = 'sc_dash_labeled_1_smoothing.gr'
18
- end
19
-
20
- def teardown
21
- #unlink_berkeley
22
- end
23
- def test_frprep_testing
24
- create_exp_file(@test_file)
25
- execute("ruby -I lib bin/frprep -e #{@test_file}")
26
- remove_exp_file(@test_file)
27
- end
28
-
29
- def test_frprep_training
30
- create_exp_file(@train_file)
31
- execute("ruby -I lib bin/frprep -e #{@train_file}")
32
- remove_exp_file(@train_file)
33
- end
34
-
35
- # Testing input in different formats.
36
- def test_frprep_plaininput
37
- create_exp_file(PRP_PLAININPUT)
38
- execute("ruby -I lib bin/frprep -e #{PRP_PLAININPUT}")
39
- remove_exp_file(PRP_PLAININPUT)
40
- end
41
-
42
- def test_frprep_stxmlinput
43
- create_exp_file(PRP_STXMLINPUT)
44
- execute("ruby -I lib bin/frprep -e #{PRP_STXMLINPUT}")
45
- remove_exp_file(PRP_STXMLINPUT)
46
- end
47
-
48
- def test_frprep_tabinput
49
- create_exp_file(PRP_TABINPUT)
50
- execute("ruby -I lib bin/frprep -e #{PRP_TABINPUT}")
51
- remove_exp_file(PRP_TABINPUT)
52
- end
53
-
54
- def test_frprep_fncorpusxmlinput
55
- create_exp_file(PRP_FNCORPUSXMLINPUT)
56
- execute("ruby -I lib bin/frprep -e #{PRP_FNCORPUSXMLINPUT}")
57
- remove_exp_file(PRP_FNCORPUSXMLINPUT)
58
- end
59
-
60
- def test_frprep_fnxmlinput
61
- create_exp_file(PRP_FNXMLINPUT)
62
- execute("ruby -I lib bin/frprep -e #{PRP_FNXMLINPUT}")
63
- remove_exp_file(PRP_FNXMLINPUT)
64
- end
65
-
66
- # Testing output in different formats.
67
- # We test only on German input assuming English input to work.
68
- def test_frprep_stxmloutput
69
- create_exp_file(PRP_STXMLOUTPUT)
70
- execute("ruby -I lib bin/frprep -e #{PRP_STXMLOUTPUT}")
71
- remove_exp_file(PRP_STXMLOUTPUT)
72
- end
73
-
74
- def test_frprep_taboutput
75
- create_exp_file(PRP_TABOUTPUT)
76
- execute("ruby -I lib bin/frprep -e #{PRP_TABOUTPUT}")
77
- remove_exp_file(PRP_TABOUTPUT)
78
- end
79
-
80
-
81
- private
82
- # Berkeley Parser takes a long time which is bad for testing.
83
- # We ran it once and reuse the result file in our tests.
84
- # Before every test we link the Berkeley interface to a stub
85
- # with the BP invocation switched off.
86
- def link_berkeley
87
- File.rename(@ptb, "#{@ptb}.bak")
88
- File.symlink(
89
- File.expand_path('test/functional/berkeley_interface.rb.stub'),
90
- File.expand_path(@ptb)
91
- )
92
- end
93
-
94
- # After testing we bring the right interface back, the program remains intact.
95
- def unlink_berkeley
96
- File.delete(@ptb)
97
- File.rename("#{@ptb}.bak", @ptb)
98
- end
99
- end
@@ -1,40 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'functional/functional_test_helper'
5
-
6
- class TestRosy < Test::Unit::TestCase
7
- include FunctionalTestHelper
8
-
9
- def setup
10
- @msg = "Rosy is doing bad, you've just broken something!"
11
- end
12
-
13
- def test_rosy_testing
14
- create_exp_file(ROSY_TEST_FILE)
15
- create_exp_file(PRP_TEST_FILE_ROSY_STD)
16
- execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
17
- execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
18
- remove_exp_file(ROSY_TEST_FILE)
19
- remove_exp_file(PRP_TEST_FILE_ROSY_STD)
20
- end
21
-
22
- def test_rosy_training
23
- create_exp_file(ROSY_TRAIN_FILE)
24
- create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
25
- execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
26
- execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
27
- execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
28
- remove_exp_file(ROSY_TRAIN_FILE)
29
- remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
30
- end
31
-
32
- def test_rosy_training_onestep
33
- create_exp_file(ROSY_TRAIN_FILE)
34
- create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
35
- execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
36
- execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
37
- remove_exp_file(ROSY_TRAIN_FILE)
38
- remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
39
- end
40
- end