shalmaneser 1.2.0.rc1 → 1.2.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +26 -8
  3. data/doc/SB_README +57 -0
  4. data/doc/exp_files_description.txt +160 -0
  5. data/doc/fred.pdf +0 -0
  6. data/doc/index.md +120 -0
  7. data/doc/salsa_tool.pdf +0 -0
  8. data/doc/salsatigerxml.pdf +0 -0
  9. data/doc/shal_doc.pdf +0 -0
  10. data/doc/shal_lrec.pdf +0 -0
  11. data/lib/ext/maxent/Classify.class +0 -0
  12. data/lib/ext/maxent/Train.class +0 -0
  13. data/lib/frprep/TreetaggerInterface.rb +4 -4
  14. data/lib/shalmaneser/version.rb +1 -1
  15. metadata +41 -48
  16. data/test/frprep/test_opt_parser.rb +0 -94
  17. data/test/functional/functional_test_helper.rb +0 -40
  18. data/test/functional/sample_experiment_files/fred_test.salsa.erb +0 -122
  19. data/test/functional/sample_experiment_files/fred_train.salsa.erb +0 -135
  20. data/test/functional/sample_experiment_files/prp_test.salsa.erb +0 -138
  21. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +0 -120
  22. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +0 -120
  23. data/test/functional/sample_experiment_files/prp_train.salsa.erb +0 -138
  24. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +0 -138
  25. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +0 -138
  26. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +0 -257
  27. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +0 -259
  28. data/test/functional/test_fred.rb +0 -47
  29. data/test/functional/test_frprep.rb +0 -52
  30. data/test/functional/test_rosy.rb +0 -40
@@ -1,259 +0,0 @@
1
- #################################################
2
- # This is a sample experiment file
3
- # with explanations of all features
4
- # that can be set for the ROSY system.
5
- #
6
- # To start your own experiment,
7
- # replace all occurrences of
8
- # %SOMETHING% or %PATH% or %PARAMETERS%
9
- # by values of your choice.
10
- #
11
- # Experiment file lines that start with '#'
12
- # are comments and are ignored. Empty lines are ignored as well.
13
-
14
- ########################
15
- # Experiment description
16
- #
17
-
18
- ##
19
- # Experiment ID:
20
- # Uniquely identifies files and database tables
21
- # of this experiment.
22
- # The experiment ID is a word (no spaces) of
23
- # letters in [A-Za-z_].
24
- experiment_ID = rosy_train
25
-
26
- # Enduser mode?
27
- # The idea is that the enduser will only _apply_
28
- # pre-trained classifiers. So in enduser mode many
29
- # options are disallowed.
30
- enduser_mode = false
31
-
32
- # directories
33
- # - data directory: where Rosy puts its internal data
34
- # - input directory:
35
- # where Rosy reads its input SalsaTigerXML data.
36
- # One directory each for the training and the test data
37
- # - output directory:
38
- # where Rosy writes its output SalsaTigerXML data:
39
- # same frames as in the input data, but frame elements newly
40
- # assigned.
41
- # If no output directory is given, output is to
42
- # <data_dir>/<experiment_ID>/output/
43
- # - classifier_dir: If present, this is where trained classifiers
44
- # are written.
45
- # Otherwise they are written to <data_dir>/<experiment_id>/classif_dir
46
- data_dir = <%= File.expand_path('test/functional/output') %>
47
- directory_input_train = <%= File.expand_path('test/functional/input/rosy/train.salsa') %>
48
- #directory_input_test = <%= File.expand_path('test/functional/output/exp_fred_salsa/output/stxml') %>
49
- #directory_output = <%= File.expand_path('test/functional/output/exp_rosy_salsa/output') %>
50
-
51
-
52
- ##
53
- # Preprocessing settings:
54
- # frprep experiment files for training and test data.
55
- preproc_descr_file_train = <%= File.expand_path('test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone') %>
56
- #preproc_descr_file_test = <%= File.expand_path('test/functional/sample_experiment_files/prp_test.salsa') %>
57
-
58
-
59
- ########################
60
- # features
61
- #
62
- # Please specify all features that you would like
63
- # Rosy to compute.
64
- # Note: The system distinguishes between features to be
65
- # computed and features to be included in the model,
66
- # so you can compute features once and then vary features
67
- # included in the model.
68
- #
69
- # Format for each feature specification:
70
- # feature = <feature_name> [dontuse | argrec | arglab | onestep]
71
- #
72
- # dontuse: the feature is computed but not included in the model.
73
- # argrec, arglab, onestep: the feature is used only in this
74
- # processing step
75
- #
76
- #
77
- # The set of features computed must stay the same throughout
78
- # an experiment (or the match of experiment file and
79
- # database table will fail), but the set of features included
80
- # in the model can be varied.
81
- #
82
- # See below for a list of all features currently available in the system.
83
-
84
- feature = pt_path
85
- feature = gf_path
86
- feature = path
87
- feature = path_length
88
- feature = pt_combined_path
89
- feature = gf_combined_path
90
- feature = combined_path
91
- feature = pt_partial_path
92
- feature = gf_partial_path
93
- feature = partial_path
94
- feature = pt_gvpath
95
- feature = gf_gvpath
96
- feature = gvpath
97
- feature = ancestor_rule
98
- feature = relpos
99
- feature = pt
100
- feature = gf
101
- feature = father_pt
102
- feature = frame
103
- feature = target
104
- feature = target_pos
105
- feature = target_voice
106
- feature = gov_verb
107
- feature = prep
108
- feature = const_head
109
- feature = const_head_pos
110
- feature = icont_word
111
- feature = firstword
112
- feature = lastword
113
- feature = leftsib
114
- feature = rightsib
115
- feature = worddistance
116
- feature = ismaxproj
117
- feature = nearest_node
118
- feature = prune
119
-
120
- ########################
121
- # classifiers
122
- #
123
- # Please specify each classifier type you want to use.
124
- # If you specify more than one classifier, classifier combination
125
- # is used.
126
- #
127
- # Format for each classifier specification:
128
- # classifier = <classifier_name> <path> [<parameters>]
129
- #
130
- # Possible values for <classifier_name> at the moment:
131
- # timbl (memory-based learning),
132
- # maxent (openlp maxent system)
133
- #
134
- # Samples:
135
- # classifier = timbl /prog/MachineLearning/Timbl5/
136
- # classifier = maxent /prog/maxent-2.4.0 /prog/shalmaneser/program/tools/maxent
137
-
138
- classifier = maxent <%= File.expand_path('tools/maxent/maxent-2.4.0') %>
139
-
140
- ########################
141
- # further settings
142
-
143
- # Pruning: Identify constituents that are very unlikely
144
- # to instantiate a semantic role, and prune them prior
145
- # to the training/application of classifiers?
146
- #
147
- # Pruning methods available at the moment:
148
- # prune: Xue/Palmer EMNLP 2004, adapted to fit each individual parser
149
- #
150
- # To enable pruning, set "prune" to the pruning method of your choice,
151
- # and also compute the feature of the same name -- see
152
- # feature list below.
153
- # To disable pruning, comment out the next line.
154
- prune = prune
155
-
156
- # verbose mode
157
- verbose = true
158
-
159
- # data adaptation:
160
- # correct training labels to
161
- # match syntax better?
162
- fe_syn_repair = true
163
- fe_rel_repair = false
164
-
165
- # xwise: For each classification step (argrec, arglab, onestep)
166
- # you can set the granularity of training:
167
- # - by frame (frame)
168
- # - by target part of speech or (target_pos)
169
- # - by target lemma. (target)
170
- #
171
- # these three settings can be combined, e.g.
172
- # xwise_argrec = target_pos frame
173
- # to train argrec frame-wise and split each frame by target POS.
174
- #
175
- # If no value is given for xwise_<step>, the default is "frame".
176
- xwise_argrec = frame
177
- xwise_arglab = frame
178
- xwise_onestep = frame
179
-
180
-
181
- # assume_argrec_perfect: by default, this is false.
182
- #
183
- # Set this to true
184
- # to perform the arglab (argument labeling) step
185
- # on all instances that actually are FEs
186
- # rather than on all instances that the argrec step
187
- # has judged to be FEs.
188
- assume_argrec_perfect = false
189
-
190
- # split_nones: set to true
191
- # to split the NONE target class into:
192
- # NONE left of target,
193
- # NONE right of target
194
- # because the NONE class has so many more instances
195
- # than any other.
196
- split_nones = true
197
-
198
-
199
- # print_eval_log: set to true to print individual correctness
200
- # judgments for each instance evaluated
201
- print_eval_log = true
202
-
203
- # External data source:
204
- #
205
- # Rosy can integrate data computed by additional systems
206
- # provided that they all use a common experiment file
207
- # for external data to determine where they put their data.
208
- # Rosy needs the path to that experiment file.
209
- #
210
- # (May be left unset when no external data is used)
211
- #external_descr_file = %PATH%
212
-
213
-
214
- ########################
215
- # rosy internal data - please don't change
216
-
217
- # Database access:
218
- # dbtype: type of database, either mysql
219
- # for a MySQL server, or sqlite for SQLite.
220
- #
221
- # if dbtype == mysql, set access parameters:
222
- # host: database server
223
- # user: user name to use
224
- # passwd: password for user
225
- # dbname: database where all Rosy's tables will be stored
226
-
227
- dbtype = mysql
228
- host = localhost
229
- user = shalm
230
- passwd = 12345
231
- dbname = shalm11
232
-
233
- # classifier output columns in the tables all start
234
- # with this prefix
235
- classif_column_name = classif
236
-
237
- # pattern for constructing the names
238
- # of the DB tables with training data (main_table_name)
239
- # and test data (test_table_name)
240
- main_table_name = rosy_<exp_ID>_main
241
- test_table_name = rosy_<exp_ID>_<test_ID>
242
-
243
- # string to use for "no value for this feature"
244
- # as well as "no FE for this instance"
245
- noval = NONE
246
-
247
- # pattern for constructing the names
248
- # of classifier files and classifier output files
249
- classifier_file = classif.<classif>.<group>
250
- classifier_output_file = classout.<classif>.<group>.<dataset>
251
-
252
- # pattern for constructing the names
253
- # of the evaluation file and the evaluation log file
254
- eval_file = eval.<exp_ID>.<step>.<test_ID>
255
- log_file = eval_log.<exp_ID>.<step>.<test_ID>
256
-
257
- # pattern for constructing the names
258
- # of the files with failed parses
259
- failed_file = parsefail.<exp_ID>.<split_ID>.<dataset>
@@ -1,47 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'functional/functional_test_helper'
5
-
6
- class TestFred < Test::Unit::TestCase
7
-
8
- include FunctionalTestHelper
9
-
10
- def setup
11
- @msg = "Fred is doing bad, you've just broken something!"
12
- @test_file = FRED_TEST_FILE
13
- @train_file = FRED_TRAIN_FILE
14
- end
15
-
16
- def test_fred_testing_featurization
17
- create_exp_file(@test_file)
18
- create_exp_file(PRP_TEST_FILE_FRED_STD)
19
- execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
20
- remove_exp_file(@test_file)
21
- remove_exp_file(PRP_TEST_FILE_FRED_STD)
22
- end
23
-
24
- def test_fred_testing_tests
25
- create_exp_file(@test_file)
26
- create_exp_file(PRP_TEST_FILE_FRED_STD)
27
- execute("ruby -I lib bin/fred -t test -e #{@test_file}")
28
- remove_exp_file(@test_file)
29
- remove_exp_file(PRP_TEST_FILE_FRED_STD)
30
- end
31
-
32
- def test_fred_training_featurization
33
- create_exp_file(@train_file)
34
- create_exp_file(PRP_TRAIN_FILE_FRED_STD)
35
- execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
36
- remove_exp_file(@train_file)
37
- remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
38
- end
39
-
40
- def test_fred_training_train
41
- create_exp_file(@train_file)
42
- create_exp_file(PRP_TRAIN_FILE_FRED_STD)
43
- execute("ruby -I lib bin/fred -t train -e #{@train_file}")
44
- remove_exp_file(@train_file)
45
- remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
46
- end
47
- end
@@ -1,52 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'functional/functional_test_helper'
5
- #require 'fileutils' # File.delete(), File.rename(), File.symlink()
6
-
7
- class TestFrprep < Test::Unit::TestCase
8
-
9
- include FunctionalTestHelper
10
-
11
- def setup
12
- @msg = "FrPrep is doing bad, you've just broken something!"
13
- @test_file = PRP_TEST_FILE
14
- @train_file = PRP_TRAIN_FILE
15
- @ptb = 'lib/frprep/interfaces/berkeley_interface.rb'
16
- link_berkeley
17
- end
18
-
19
- def teardown
20
- unlink_berkeley
21
- end
22
- def test_frprep_testing
23
- create_exp_file(@test_file)
24
- execute("ruby -I lib bin/frprep -e #{@test_file}")
25
- remove_exp_file(@test_file)
26
- end
27
-
28
- def test_frprep_training
29
- create_exp_file(@train_file)
30
- execute("ruby -I lib bin/frprep -e #{@train_file}")
31
- remove_exp_file(@train_file)
32
- end
33
-
34
- private
35
- # Berkeley Parser takes a long time which is bad for testing.
36
- # We ran it once and reuse the result file in our tests.
37
- # Before every test we link the Berkeley interface to a stub
38
- # with the BP invocation switched off.
39
- def link_berkeley
40
- File.rename(@ptb, "#{@ptb}.bak")
41
- File.symlink(
42
- File.expand_path('test/functional/berkeley_interface.rb.stub'),
43
- File.expand_path(@ptb)
44
- )
45
- end
46
-
47
- # After testing we bring the right interface back, the program remains intact.
48
- def unlink_berkeley
49
- File.delete(@ptb)
50
- File.rename("#{@ptb}.bak", @ptb)
51
- end
52
- end
@@ -1,40 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'functional/functional_test_helper'
5
-
6
- class TestRosy < Test::Unit::TestCase
7
- include FunctionalTestHelper
8
-
9
- def setup
10
- @msg = "Rosy is doing bad, you've just broken something!"
11
- end
12
-
13
- def test_rosy_testing
14
- create_exp_file(ROSY_TEST_FILE)
15
- create_exp_file(PRP_TEST_FILE_ROSY_STD)
16
- execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
17
- execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
18
- remove_exp_file(ROSY_TEST_FILE)
19
- remove_exp_file(PRP_TEST_FILE_ROSY_STD)
20
- end
21
-
22
- def test_rosy_training
23
- create_exp_file(ROSY_TRAIN_FILE)
24
- create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
25
- execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
26
- execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
27
- execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
28
- remove_exp_file(ROSY_TRAIN_FILE)
29
- remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
30
- end
31
-
32
- def test_rosy_training_onestep
33
- create_exp_file(ROSY_TRAIN_FILE)
34
- create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
35
- execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
36
- execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
37
- remove_exp_file(ROSY_TRAIN_FILE)
38
- remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
39
- end
40
- end