shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/rosy +14 -7
- data/lib/rosy/FailedParses.rb +22 -20
- data/lib/rosy/FeatureInfo.rb +35 -31
- data/lib/rosy/GfInduce.rb +132 -130
- data/lib/rosy/GfInduceFeature.rb +86 -68
- data/lib/rosy/InputData.rb +59 -55
- data/lib/rosy/RosyConfusability.rb +47 -40
- data/lib/rosy/RosyEval.rb +55 -55
- data/lib/rosy/RosyFeatureExtractors.rb +295 -290
- data/lib/rosy/RosyFeaturize.rb +54 -67
- data/lib/rosy/RosyInspect.rb +52 -50
- data/lib/rosy/RosyIterator.rb +73 -67
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
- data/lib/rosy/RosyPruning.rb +39 -31
- data/lib/rosy/RosyServices.rb +116 -115
- data/lib/rosy/RosySplit.rb +55 -53
- data/lib/rosy/RosyTask.rb +7 -3
- data/lib/rosy/RosyTest.rb +174 -191
- data/lib/rosy/RosyTrain.rb +46 -50
- data/lib/rosy/RosyTrainingTestTable.rb +101 -99
- data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
- data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
- data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
- data/lib/rosy/external_feature_extractor.rb +35 -0
- data/lib/rosy/opt_parser.rb +231 -201
- data/lib/rosy/rosy.rb +63 -64
- data/lib/rosy/rosy_conventions.rb +66 -0
- data/lib/rosy/rosy_error.rb +15 -0
- data/lib/rosy/var_var_restriction.rb +16 -0
- data/lib/shalmaneser/rosy.rb +1 -0
- metadata +26 -19
- data/lib/rosy/ExternalConfigData.rb +0 -58
- data/lib/rosy/View.rb +0 -418
- data/lib/rosy/rosy_config_data.rb +0 -121
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -58
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -99
- data/test/functional/test_rosy.rb +0 -40
@@ -1,9 +1,11 @@
|
|
1
|
+
module Shalmaneser
|
2
|
+
module Rosy
|
1
3
|
module TargetsMostFrequentSc
|
2
|
-
def determine_target_most_frequent_sc(view,
|
3
|
-
noval,
|
4
|
+
def determine_target_most_frequent_sc(view,
|
5
|
+
noval,
|
4
6
|
with_frame_default = nil)
|
5
|
-
target_subcat =
|
6
|
-
frame_subcat =
|
7
|
+
target_subcat = {}
|
8
|
+
frame_subcat = {}
|
7
9
|
|
8
10
|
view.each_sentence { |sentence|
|
9
11
|
|
@@ -35,8 +37,8 @@ module TargetsMostFrequentSc
|
|
35
37
|
} # each sentence of view
|
36
38
|
|
37
39
|
# most frequent subcat for each target:
|
38
|
-
retv =
|
39
|
-
retv2 =
|
40
|
+
retv = {}
|
41
|
+
retv2 = {}
|
40
42
|
[[retv, target_subcat], [retv2, frame_subcat]].each { |out_hash, in_hash|
|
41
43
|
|
42
44
|
in_hash.each_pair { |key, subcats|
|
@@ -53,8 +55,10 @@ module TargetsMostFrequentSc
|
|
53
55
|
end
|
54
56
|
|
55
57
|
def tmf_target_key(instance)
|
56
|
-
return instance["frame"] + "." +
|
57
|
-
instance["target"] + "." +
|
58
|
-
instance["target_pos"]
|
58
|
+
return instance["frame"] + "." +
|
59
|
+
instance["target"] + "." +
|
60
|
+
instance["target_pos"]
|
59
61
|
end
|
60
62
|
end
|
63
|
+
end
|
64
|
+
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
# Katrin Erk November 05
|
2
|
-
#
|
2
|
+
#
|
3
3
|
# Abstract classes for
|
4
4
|
# - Rosy features
|
5
5
|
# - Rosy interface for external knowledge sources.
|
6
6
|
|
7
|
-
require '
|
7
|
+
require 'configuration/external_config_data'
|
8
8
|
|
9
|
+
module Shalmaneser
|
10
|
+
module Rosy
|
9
11
|
####
|
10
12
|
# Feature Extractor:
|
11
13
|
# computes one or more features for a node (a SynNode object) out of
|
@@ -21,15 +23,15 @@ class AbstractFeatureExtractor
|
|
21
23
|
# returns a string: the designator for this feature extractor
|
22
24
|
# (an extractor may compute several features, but
|
23
25
|
# in the experiment file it is chosen by a single designator)
|
24
|
-
def
|
26
|
+
def self.designator
|
25
27
|
raise "Overwrite me"
|
26
28
|
end
|
27
29
|
|
28
30
|
###
|
29
|
-
# returns an array of feature names, the names of the
|
31
|
+
# returns an array of feature names, the names of the
|
30
32
|
# features that it can compute.
|
31
33
|
# The number of features that the extractor computes must be fixed.
|
32
|
-
def
|
34
|
+
def self.feature_names
|
33
35
|
raise "Overwrite me."
|
34
36
|
end
|
35
37
|
|
@@ -37,12 +39,12 @@ class AbstractFeatureExtractor
|
|
37
39
|
# returns a string: the data type for the feature
|
38
40
|
# to be passed on to the MySQL database,
|
39
41
|
# e.g. VARCHAR(10), INT
|
40
|
-
def
|
42
|
+
def self.sql_type
|
41
43
|
raise "Overwrite me"
|
42
44
|
end
|
43
45
|
|
44
46
|
###
|
45
|
-
# returns a string: the feature type
|
47
|
+
# returns a string: the feature type
|
46
48
|
# (the same for all features computed by this extractor)
|
47
49
|
# possible values:
|
48
50
|
# - gold: gold label
|
@@ -50,7 +52,7 @@ class AbstractFeatureExtractor
|
|
50
52
|
# - syn: feature computed from syntactic characteristics of the instance
|
51
53
|
# - sem: feature involving semantic characteristics of the instance
|
52
54
|
# - sentlevel: this feature is the same for all instances of a sentence
|
53
|
-
def
|
55
|
+
def self.feature_type
|
54
56
|
raise "Overwrite me"
|
55
57
|
end
|
56
58
|
|
@@ -59,19 +61,19 @@ class AbstractFeatureExtractor
|
|
59
61
|
# depending on whether the feature is computed
|
60
62
|
# directly from the SalsaTigerSentence and the SynNode objects
|
61
63
|
# or whether it is computed from the phase 1 features
|
62
|
-
def
|
64
|
+
def self.phase
|
63
65
|
raise "Overwrite me."
|
64
66
|
end
|
65
67
|
|
66
68
|
###
|
67
69
|
# returns an array of strings, providing information about
|
68
70
|
# the feature extractor
|
69
|
-
def
|
70
|
-
|
71
|
+
def self.info
|
72
|
+
[]
|
71
73
|
end
|
72
74
|
|
73
75
|
###
|
74
|
-
# set sentence, set node, set other settings:
|
76
|
+
# set sentence, set node, set other settings:
|
75
77
|
# this is done prior to
|
76
78
|
# feature computation using compute_feature()
|
77
79
|
# such that computations that stay the same for
|
@@ -84,10 +86,11 @@ class AbstractFeatureExtractor
|
|
84
86
|
frame) # FrameNode object
|
85
87
|
@@sent = sent
|
86
88
|
@@frame = frame
|
87
|
-
|
89
|
+
|
88
90
|
return true
|
89
91
|
end
|
90
92
|
|
93
|
+
# @todo Rename and change the return value.
|
91
94
|
def AbstractFeatureExtractor.set_node(node) # SynNode of the sentence set in set_sentence
|
92
95
|
@@node = node
|
93
96
|
|
@@ -101,7 +104,7 @@ class AbstractFeatureExtractor
|
|
101
104
|
# several features can be done in advance
|
102
105
|
def AbstractFeatureExtractor.set(var_hash = {})
|
103
106
|
# no settings at this point
|
104
|
-
|
107
|
+
|
105
108
|
return true
|
106
109
|
end
|
107
110
|
# test during initialisation whether a feature is computable
|
@@ -124,12 +127,12 @@ class AbstractFeatureExtractor
|
|
124
127
|
#
|
125
128
|
# returns an array of features (strings), length the same as the
|
126
129
|
# length of feature_names()
|
127
|
-
def compute_features
|
130
|
+
def compute_features
|
128
131
|
raise "overwrite me"
|
129
132
|
end
|
130
133
|
|
131
134
|
###
|
132
|
-
# phase 2 extractors:
|
135
|
+
# phase 2 extractors:
|
133
136
|
# compute features for a complete view
|
134
137
|
#
|
135
138
|
# returns: an array of columns,
|
@@ -139,7 +142,7 @@ class AbstractFeatureExtractor
|
|
139
142
|
raise "overwrite me"
|
140
143
|
end
|
141
144
|
|
142
|
-
# At this place, we had abstract methods for "training" phase 2 features
|
145
|
+
# At this place, we had abstract methods for "training" phase 2 features
|
143
146
|
# Since this involves introducing a "state" that is nontrivial to preserve
|
144
147
|
# for a standalone version of the classifiers, without keeping the training data,
|
145
148
|
# we decided to remove this functionality (30.11.05).
|
@@ -149,94 +152,16 @@ class AbstractFeatureExtractor
|
|
149
152
|
######
|
150
153
|
protected
|
151
154
|
|
152
|
-
def AbstractFeatureExtractor.announce_me
|
155
|
+
def AbstractFeatureExtractor.announce_me
|
153
156
|
# AB: In 1.9 constants are symbols.
|
154
157
|
if Module.constants.include?("RosyFeatureInfo") or Module.constants.include?(:RosyFeatureInfo)
|
155
158
|
# yup, we have a class to which we can announce ourselves
|
156
|
-
RosyFeatureInfo.add_feature(
|
159
|
+
RosyFeatureInfo.add_feature(self)
|
157
160
|
else
|
158
161
|
# no interface collector class
|
159
162
|
# $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
|
160
163
|
end
|
161
164
|
end
|
162
165
|
end
|
163
|
-
|
164
|
-
################################################################
|
165
|
-
# Wrapper class for extractors that compute a single feature
|
166
|
-
class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
|
167
|
-
|
168
|
-
###
|
169
|
-
# returns a string: the designator for this feature extractor
|
170
|
-
# (an extractor may compute several features, but
|
171
|
-
# in the experiment file it is chosen by a single designator)
|
172
|
-
#
|
173
|
-
# here: single feature, and the feature name is the designator
|
174
|
-
def AbstractFeatureExtractor.designator()
|
175
|
-
return eval(self.name()).feature_name()
|
176
|
-
end
|
177
|
-
|
178
|
-
###
|
179
|
-
def AbstractSingleFeatureExtractor.feature_names()
|
180
|
-
return [eval(self.name()).feature_name()]
|
181
|
-
end
|
182
|
-
|
183
|
-
###
|
184
|
-
def compute_features()
|
185
|
-
return [compute_feature()]
|
186
|
-
end
|
187
|
-
|
188
|
-
def compute_features_on_view(view) # DBView object
|
189
|
-
return [compute_feature_on_view(view)]
|
190
|
-
end
|
191
|
-
|
192
|
-
|
193
|
-
######
|
194
|
-
# Single-feature methods
|
195
|
-
|
196
|
-
###
|
197
|
-
def AbstractSingleFeatureExtractor.feature_name()
|
198
|
-
raise "Overwrite me."
|
199
|
-
end
|
200
|
-
|
201
|
-
###
|
202
|
-
def compute_feature()
|
203
|
-
raise "Overwrite me"
|
204
|
-
end
|
205
|
-
|
206
|
-
###
|
207
|
-
def compute_feature_on_view(view) # DBView object
|
208
|
-
raise "Overwrite me"
|
209
|
-
end
|
210
166
|
end
|
211
|
-
|
212
|
-
######################################################
|
213
|
-
|
214
|
-
class ExternalFeatureExtractor < AbstractFeatureExtractor
|
215
|
-
|
216
|
-
@@warning_uttered = false
|
217
|
-
|
218
|
-
####
|
219
|
-
# initialization:
|
220
|
-
#
|
221
|
-
# read experiment file for external interfaces
|
222
|
-
def initialize(exp, # RosyConfigData object
|
223
|
-
interpreter_class)
|
224
|
-
|
225
|
-
@exp_rosy = exp
|
226
|
-
@@interpreter_class = interpreter_class
|
227
|
-
|
228
|
-
unless @exp_rosy.get("external_descr_file")
|
229
|
-
unless @@warning_uttered
|
230
|
-
$stderr.puts "Warning: Cannot compute external feature"
|
231
|
-
$stderr.puts "since 'external_descr_file' has not been set"
|
232
|
-
$stderr.puts "in the Rosy experiment file."
|
233
|
-
@@warning_uttered = true
|
234
|
-
end
|
235
|
-
|
236
|
-
@exp_external = nil
|
237
|
-
return
|
238
|
-
end
|
239
|
-
|
240
|
-
@exp_external = ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
|
241
|
-
end
|
242
167
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require_relative 'abstract_feature_extractor'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Rosy
|
5
|
+
################################################################
|
6
|
+
# Wrapper class for extractors that compute a single feature
|
7
|
+
class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
|
8
|
+
|
9
|
+
###
|
10
|
+
# returns a string: the designator for this feature extractor
|
11
|
+
# (an extractor may compute several features, but
|
12
|
+
# in the experiment file it is chosen by a single designator)
|
13
|
+
#
|
14
|
+
# here: single feature, and the feature name is the designator
|
15
|
+
def self.designator
|
16
|
+
feature_name
|
17
|
+
end
|
18
|
+
|
19
|
+
###
|
20
|
+
def self.feature_names
|
21
|
+
[feature_name]
|
22
|
+
end
|
23
|
+
|
24
|
+
###
|
25
|
+
def compute_features
|
26
|
+
[compute_feature]
|
27
|
+
end
|
28
|
+
|
29
|
+
def compute_features_on_view(view) # DBView object
|
30
|
+
[compute_feature_on_view(view)]
|
31
|
+
end
|
32
|
+
|
33
|
+
######
|
34
|
+
# Single-feature methods
|
35
|
+
|
36
|
+
###
|
37
|
+
def self.feature_name
|
38
|
+
raise "Overwrite me."
|
39
|
+
end
|
40
|
+
|
41
|
+
###
|
42
|
+
def compute_feature
|
43
|
+
raise "Overwrite me"
|
44
|
+
end
|
45
|
+
|
46
|
+
###
|
47
|
+
def compute_feature_on_view(view) # DBView object
|
48
|
+
raise "Overwrite me"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require_relative 'abstract_feature_extractor'
|
2
|
+
require 'configuration/external_config_data'
|
3
|
+
|
4
|
+
module Shalmaneser
|
5
|
+
module Rosy
|
6
|
+
class ExternalFeatureExtractor < AbstractFeatureExtractor
|
7
|
+
|
8
|
+
@@warning_uttered = false
|
9
|
+
|
10
|
+
####
|
11
|
+
# initialization:
|
12
|
+
#
|
13
|
+
# read experiment file for external interfaces
|
14
|
+
# @param [RosyConfigData] exp object
|
15
|
+
def initialize(exp, interpreter_class)
|
16
|
+
@exp_rosy = exp
|
17
|
+
@@interpreter_class = interpreter_class
|
18
|
+
|
19
|
+
unless @exp_rosy.get("external_descr_file")
|
20
|
+
unless @@warning_uttered
|
21
|
+
$stderr.puts "Warning: Cannot compute external feature"
|
22
|
+
$stderr.puts "since 'external_descr_file' has not been set"
|
23
|
+
$stderr.puts "in the Rosy experiment file."
|
24
|
+
@@warning_uttered = true
|
25
|
+
end
|
26
|
+
|
27
|
+
@exp_external = nil
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
@exp_external = Shalmaneser::Configuration::ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/rosy/opt_parser.rb
CHANGED
@@ -1,148 +1,179 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
require 'getoptlong'
|
4
|
+
require 'definitions'
|
5
|
+
require 'configuration/rosy_config_data'
|
6
|
+
|
7
|
+
module Shalmaneser
|
8
|
+
module Rosy
|
9
|
+
class OptParser
|
10
|
+
def self.parse(cmd_args)
|
11
|
+
##############################
|
12
|
+
# main starts here
|
13
|
+
##############################
|
14
|
+
|
15
|
+
##
|
16
|
+
# evaluate runtime arguments
|
17
|
+
|
18
|
+
tasks = {
|
19
|
+
"featurize" => [
|
20
|
+
# test table ID, required for test, no default
|
21
|
+
['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
|
22
|
+
# set to featurize: 'train' or 'test', no default
|
23
|
+
['--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT],
|
24
|
+
# splitlog ID: if given, featurize this split. Cannot use both this and -d
|
25
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
|
26
|
+
['--append', '-A', GetoptLong::NO_ARGUMENT]
|
27
|
+
],
|
28
|
+
"split" => [
|
29
|
+
# splitlog ID, required, no default
|
30
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
|
31
|
+
# percentage training data, default: 90
|
32
|
+
['--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT]
|
33
|
+
],
|
34
|
+
"train" => [
|
35
|
+
# splitlog ID; if given, will train on split rather than all of main table
|
36
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
|
37
|
+
# classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
38
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT]
|
39
|
+
],
|
40
|
+
"test" => [
|
41
|
+
# classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
42
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
|
43
|
+
# test table ID: if given, test on this table
|
44
|
+
['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
|
45
|
+
# splitlog ID: if given, test on this split. Cannot use both this and -i
|
46
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
|
47
|
+
# set this to prevent output of disambiguated test data
|
48
|
+
['--nooutput', '-N', GetoptLong::NO_ARGUMENT]
|
49
|
+
],
|
50
|
+
"eval" => [
|
51
|
+
# classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
52
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
|
53
|
+
# test table ID: if given, test on this table
|
54
|
+
['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
|
55
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
|
56
|
+
],
|
57
|
+
"inspect" => [
|
58
|
+
# describe all tables
|
59
|
+
['--tables', GetoptLong::NO_ARGUMENT],
|
60
|
+
# describe table contents for current experiment
|
61
|
+
['--tablecont', GetoptLong::OPTIONAL_ARGUMENT],
|
62
|
+
# test table ID: if given, describe contents of this table
|
63
|
+
['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
|
64
|
+
# describe classification runs for current experiment
|
65
|
+
['--runs', GetoptLong::NO_ARGUMENT],
|
66
|
+
# list sentence IDs for given splitlog
|
67
|
+
['--split', GetoptLong::REQUIRED_ARGUMENT]
|
68
|
+
],
|
69
|
+
"services" => [
|
70
|
+
# delete database table
|
71
|
+
['--deltable', GetoptLong::REQUIRED_ARGUMENT],
|
72
|
+
# delete experiment tables and files
|
73
|
+
['--delexp', GetoptLong::NO_ARGUMENT],
|
74
|
+
# delete tables interactively
|
75
|
+
['--deltables', GetoptLong::NO_ARGUMENT],
|
76
|
+
# delete runs
|
77
|
+
['--delruns', GetoptLong::NO_ARGUMENT],
|
78
|
+
# delete split
|
79
|
+
['--delsplit', GetoptLong::REQUIRED_ARGUMENT],
|
80
|
+
# dump experiment to files
|
81
|
+
['--dump', GetoptLong::OPTIONAL_ARGUMENT],
|
82
|
+
# load experiment from files
|
83
|
+
['--load', GetoptLong::OPTIONAL_ARGUMENT],
|
84
|
+
# write feature files
|
85
|
+
['--writefeatures', GetoptLong::OPTIONAL_ARGUMENT],
|
86
|
+
# classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
87
|
+
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
|
88
|
+
# test table ID: if given, test on this table
|
89
|
+
['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
|
90
|
+
# splitlog ID: if given, test on this split. Cannot use both this and -i
|
91
|
+
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
|
92
|
+
]
|
93
|
+
}
|
94
|
+
|
95
|
+
optnames = [
|
96
|
+
# get help
|
97
|
+
['--help', '-h', GetoptLong::NO_ARGUMENT],
|
98
|
+
# experiment file name (and path), no default
|
99
|
+
['--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT],
|
100
|
+
# task to perform: one of task.keys, no default
|
101
|
+
['--task', '-t', GetoptLong::REQUIRED_ARGUMENT]
|
102
|
+
]
|
103
|
+
|
104
|
+
tasks.values.each { |more_optnames| optnames.concat more_optnames }
|
105
|
+
|
106
|
+
optnames.uniq!
|
107
|
+
|
108
|
+
begin
|
109
|
+
opts = GetoptLong.new(*optnames)
|
110
|
+
rescue => e
|
111
|
+
$stderr.puts "Error: unknown command line option: #{e.message}!"
|
112
|
+
exit 1
|
113
|
+
end
|
4
114
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
],
|
36
|
-
"eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
37
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
38
|
-
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
|
39
|
-
],
|
40
|
-
"inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
|
41
|
-
[ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
|
42
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
|
43
|
-
[ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
|
44
|
-
[ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
|
45
|
-
],
|
46
|
-
"services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
|
47
|
-
[ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
|
48
|
-
[ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
|
49
|
-
[ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
|
50
|
-
[ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
|
51
|
-
[ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
|
52
|
-
[ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
|
53
|
-
[ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
|
54
|
-
['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
|
55
|
-
[ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
|
56
|
-
['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
|
57
|
-
]
|
58
|
-
}
|
59
|
-
|
60
|
-
optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
|
61
|
-
[ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
|
62
|
-
[ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
|
63
|
-
]
|
64
|
-
|
65
|
-
tasks.values.each { |more_optnames|
|
66
|
-
optnames.concat more_optnames
|
67
|
-
}
|
68
|
-
|
69
|
-
optnames.uniq!
|
70
|
-
|
71
|
-
# asterisk: "explode" array into individual parameters
|
72
|
-
begin
|
73
|
-
opts = options_hash(GetoptLong.new(*optnames))
|
74
|
-
rescue
|
75
|
-
$stderr.puts "Error: unknown command line option: " + $!
|
76
|
-
exit 1
|
77
|
-
end
|
78
|
-
|
79
|
-
experiment_filename = nil
|
80
|
-
|
81
|
-
##
|
82
|
-
# are we being asked for help?
|
83
|
-
if opts['--help']
|
84
|
-
help()
|
85
|
-
exit(0)
|
86
|
-
end
|
87
|
-
|
88
|
-
##
|
89
|
-
# now find the task
|
90
|
-
task = opts['--task']
|
91
|
-
# sanity checks for task
|
92
|
-
if task.nil?
|
93
|
-
help()
|
94
|
-
exit(0)
|
95
|
-
end
|
96
|
-
unless tasks.keys.include? task
|
97
|
-
$stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
|
98
|
-
exit 1
|
99
|
-
end
|
100
|
-
|
101
|
-
##
|
102
|
-
# now evaluate the rest of the options
|
103
|
-
opts.each_pair { |opt,arg|
|
104
|
-
case opt
|
105
|
-
when '--help', '--task'
|
115
|
+
experiment_filename = nil
|
116
|
+
|
117
|
+
opts = options_hash(opts)
|
118
|
+
##
|
119
|
+
# are we being asked for help?
|
120
|
+
# @ todo work with the empty case
|
121
|
+
if opts['--help']
|
122
|
+
help
|
123
|
+
exit(0)
|
124
|
+
end
|
125
|
+
|
126
|
+
##
|
127
|
+
# now find the task
|
128
|
+
task = opts['--task']
|
129
|
+
# sanity checks for task
|
130
|
+
if task.nil?
|
131
|
+
help
|
132
|
+
exit(0)
|
133
|
+
end
|
134
|
+
|
135
|
+
unless tasks.keys.include?(task)
|
136
|
+
$stderr.puts "Sorry, I don't know the task '#{task}'. Do 'rosy -h' for a list of tasks."
|
137
|
+
exit 1
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# now evaluate the rest of the options
|
142
|
+
opts.each_pair do |opt, arg|
|
143
|
+
case opt
|
144
|
+
when '--help', '--task'
|
106
145
|
# we already handled this
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
146
|
+
when '--expfile'
|
147
|
+
experiment_filename = arg
|
148
|
+
else
|
149
|
+
# do we know this option?
|
150
|
+
unless tasks[task].assoc(opt)
|
151
|
+
$stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
|
152
|
+
$stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
|
153
|
+
exit 1
|
154
|
+
end
|
115
155
|
end
|
116
156
|
end
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
# sanity checks
|
131
|
-
unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
|
132
|
-
$stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
|
133
|
-
exit 1
|
157
|
+
|
158
|
+
# @todo This case is irreal since this restriction is set by the parser itself.
|
159
|
+
if experiment_filename.nil?
|
160
|
+
$stderr.puts "I need an experiment file name, option --expfile|-e"
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# open config file
|
166
|
+
|
167
|
+
exp = ::Shalmaneser::Configuration::RosyConfigData.new(experiment_filename)
|
168
|
+
|
169
|
+
[exp, opts]
|
134
170
|
end
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
private
|
143
|
-
def self.help
|
144
|
-
$stderr.puts "
|
145
|
-
ROSY: semantic ROle assignment SYstem Version 0.2
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def self.help
|
175
|
+
$stderr.puts "
|
176
|
+
ROSY: semantic ROle assignment SYstem, Version #{VERSION}
|
146
177
|
|
147
178
|
Usage:
|
148
179
|
|
@@ -150,18 +181,18 @@ ruby rosy.rb --help|-h
|
|
150
181
|
|
151
182
|
gets you this help text.
|
152
183
|
|
153
|
-
ruby rosy.rb --task|-t featurize --expfile|-e <e>
|
154
|
-
[--dataset|-d <d>] [--testID|-i <i>]
|
184
|
+
ruby rosy.rb --task|-t featurize --expfile|-e <e>
|
185
|
+
[--dataset|-d <d>] [--testID|-i <i>]
|
155
186
|
[--logID|-l <l> ] [--append|-A]
|
156
187
|
featurizes input data and stores it in a database.
|
157
188
|
Enduser mode: dataset has to be 'test' (preset as default),
|
158
189
|
no --append.
|
159
190
|
|
160
|
-
--expfile <e> Use <e> as the experiment description and
|
191
|
+
--expfile <e> Use <e> as the experiment description and
|
161
192
|
configuration file
|
162
193
|
|
163
|
-
--dataset <d> Set to featurize: <d> is either 'train'
|
164
|
-
(put data into main table) or 'test' (put data
|
194
|
+
--dataset <d> Set to featurize: <d> is either 'train'
|
195
|
+
(put data into main table) or 'test' (put data
|
165
196
|
into separate test table with ID given using --testID)
|
166
197
|
Use at least one of --logID, --dataset.
|
167
198
|
|
@@ -171,15 +202,15 @@ ruby rosy.rb --task|-t featurize --expfile|-e <e>
|
|
171
202
|
Use at least one of --logID, --dataset.
|
172
203
|
|
173
204
|
--testID <i> Use <i> as the ID for the table to store the test data.
|
174
|
-
necessary only with '--dataset test'. default: #{default_test_ID
|
205
|
+
necessary only with '--dataset test'. default: #{::Rosy.default_test_ID}.
|
175
206
|
|
176
|
-
--append Do not overwrite previously computed features
|
207
|
+
--append Do not overwrite previously computed features
|
177
208
|
for this experiment.
|
178
|
-
Rather, append the new features
|
209
|
+
Rather, append the new features
|
179
210
|
to the old featurization files.
|
180
211
|
Default: overwrite
|
181
212
|
|
182
|
-
ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
|
213
|
+
ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
|
183
214
|
[--trainpercent|-r <r>]
|
184
215
|
produces a new train/test split on the main table of the experiment.
|
185
216
|
Not available in enduser mode.
|
@@ -188,7 +219,7 @@ ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
|
|
188
219
|
|
189
220
|
--logID <l> Use <l> as the ID for storing this new split
|
190
221
|
|
191
|
-
--trainpercent <r> Allocate <r> percent of the data as train,
|
222
|
+
--trainpercent <r> Allocate <r> percent of the data as train,
|
192
223
|
and 100-<r> as test
|
193
224
|
default: <r>=90
|
194
225
|
|
@@ -200,20 +231,20 @@ ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
|
|
200
231
|
--expfile <f> Use <f> as the experiment description and configuration file
|
201
232
|
|
202
233
|
--step <s> What kind of classifier(s) to train?
|
203
|
-
<s>=argrec: argument recognition,
|
234
|
+
<s>=argrec: argument recognition,
|
204
235
|
distinguish role from nonrole
|
205
|
-
<s>=arglab: argument labeling, naming roles,
|
236
|
+
<s>=arglab: argument labeling, naming roles,
|
206
237
|
builds on argrec
|
207
238
|
<s>=both: first argrec, then arglab
|
208
239
|
<s>=onestep: do argument labeling right away without
|
209
240
|
prior filtering of non-arguments
|
210
241
|
default: both
|
211
242
|
|
212
|
-
--logID <l> If given, train on this split of the main table rather than
|
243
|
+
--logID <l> If given, train on this split of the main table rather than
|
213
244
|
the whole main table
|
214
245
|
|
215
246
|
|
216
|
-
ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
247
|
+
ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
217
248
|
[--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
|
218
249
|
apply classifier(s) on data from a test table, or a main table split
|
219
250
|
Enduser mode: only -s both, -s onestep available. Cleanup: Database with
|
@@ -222,12 +253,12 @@ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
|
222
253
|
--expfile <f> Use <f> as the experiment description and configuration file
|
223
254
|
|
224
255
|
--step <s> What kind of classifier(s) to use for testing?
|
225
|
-
<s>=argrec: argument recognition,
|
256
|
+
<s>=argrec: argument recognition,
|
226
257
|
distinguish role from nonrole
|
227
|
-
<s>=arglab: argument labeling, naming roles,
|
258
|
+
<s>=arglab: argument labeling, naming roles,
|
228
259
|
builds on argrec
|
229
260
|
<s>=both: first argrec, then arglab
|
230
|
-
<s>=onestep: do argument labeling right away without
|
261
|
+
<s>=onestep: do argument labeling right away without
|
231
262
|
prior filtering of non-arguments
|
232
263
|
default: both
|
233
264
|
--logID <l> If given, test on this split of the main table
|
@@ -235,31 +266,31 @@ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
|
|
235
266
|
--testID <i> If given, test on this test table.
|
236
267
|
(Use either this option or -l)
|
237
268
|
|
238
|
-
--nooutput Do not produce an output of the disambiguated test data
|
269
|
+
--nooutput Do not produce an output of the disambiguated test data
|
239
270
|
in SalsaTigerXML format. This is useful if you just want
|
240
271
|
to evaluate the system.
|
241
272
|
Default: output is produced.
|
242
273
|
|
243
274
|
|
244
|
-
ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
|
245
|
-
[--logID|-l <l> | --testID|-i <i>
|
275
|
+
ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
|
276
|
+
[--logID|-l <l> | --testID|-i <i>
|
246
277
|
evaluate the classification results.
|
247
278
|
Not available in enduser mode.
|
248
279
|
|
249
280
|
--expfile <f> Use <f> as the experiment description and configuration file
|
250
281
|
|
251
282
|
--step <s> Evaluate results of which classification step?
|
252
|
-
<s>=argrec: argument recognition,
|
283
|
+
<s>=argrec: argument recognition,
|
253
284
|
distinguish role from nonrole
|
254
|
-
<s>=arglab: argument labeling, naming roles,
|
285
|
+
<s>=arglab: argument labeling, naming roles,
|
255
286
|
builds on argrec
|
256
287
|
<s>=both: first argrec, then arglab
|
257
|
-
<s>=onestep: do argument labeling right away without
|
288
|
+
<s>=onestep: do argument labeling right away without
|
258
289
|
prior filtering of non-arguments
|
259
290
|
default: both
|
260
291
|
Need not be given if --runID is given.
|
261
292
|
|
262
|
-
--logID <l> If given, evaluate on the test data from this split of
|
293
|
+
--logID <l> If given, evaluate on the test data from this split of
|
263
294
|
the main table.
|
264
295
|
(use either this option or -i or -R)
|
265
296
|
|
@@ -267,29 +298,29 @@ ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
|
|
267
298
|
(Use either this option or -l or -R)
|
268
299
|
|
269
300
|
|
270
|
-
ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
|
301
|
+
ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
|
271
302
|
[--tablecont [N]] [--testID|-i <i>] [--split <l>]
|
272
|
-
inspect system-internal data, both global and pertaining to the current
|
303
|
+
inspect system-internal data, both global and pertaining to the current
|
273
304
|
experiment.
|
274
|
-
If no options are chosen, an overview of the current experiment
|
305
|
+
If no options are chosen, an overview of the current experiment
|
275
306
|
is given.
|
276
307
|
|
277
|
-
--expfile <f> Use <f> as the experiment description and
|
308
|
+
--expfile <f> Use <f> as the experiment description and
|
278
309
|
configuration file
|
279
310
|
|
280
311
|
--tables Lists all tables of the DB: table name,column names
|
281
312
|
|
282
|
-
--tablecont [N|id:N] Lists the training instances (as feature vectors)
|
313
|
+
--tablecont [N|id:N] Lists the training instances (as feature vectors)
|
283
314
|
of the current experiment.
|
284
315
|
If test ID is given, test instances are listed as well.
|
285
316
|
The optional argument may have one of two forms:
|
286
|
-
- It may be a number N. Then only the N first lines
|
317
|
+
- It may be a number N. Then only the N first lines
|
287
318
|
of each set are listed.
|
288
319
|
- It may be a pair id:N. Then only the N first lines of
|
289
320
|
the DB table with ID id are listed. To list all lines
|
290
321
|
of a single DB table, use id:
|
291
322
|
|
292
|
-
--testID <i> If given, --tablecont also lists the feature vectors for
|
323
|
+
--testID <i> If given, --tablecont also lists the feature vectors for
|
293
324
|
this test table
|
294
325
|
|
295
326
|
--runs List all classification runs of the current experiment
|
@@ -298,29 +329,29 @@ ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
|
|
298
329
|
|
299
330
|
ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
|
300
331
|
[--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
|
301
|
-
[--delsplit <l>] [--writefeatures [<D>]]
|
302
|
-
[--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
|
332
|
+
[--delsplit <l>] [--writefeatures [<D>]]
|
333
|
+
[--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
|
303
334
|
diverse services.
|
304
335
|
The --del* services are not available in enduser mode.
|
305
336
|
|
306
337
|
--dump [<D>] Dump the database tables for the current experiment file.
|
307
338
|
If a directory <D> is given, the tables are written there,
|
308
|
-
otherwise they are written to
|
309
|
-
data_dir/<experiment_ID>/tables, where data_dir is the
|
339
|
+
otherwise they are written to
|
340
|
+
data_dir/<experiment_ID>/tables, where data_dir is the
|
310
341
|
data directory given in the experiment file.
|
311
342
|
No existing files in the directory are removed.
|
312
343
|
|
313
344
|
--load [<D>] Construct new database tables from the files in
|
314
|
-
the directory <D>, if it is given, otherwise from
|
315
|
-
data_dir/<experiment_id>/tables, where data_dir
|
345
|
+
the directory <D>, if it is given, otherwise from
|
346
|
+
data_dir/<experiment_id>/tables, where data_dir
|
316
347
|
is the data directory given in the experiment file.
|
317
|
-
Warning: Database tables are loaded into the
|
348
|
+
Warning: Database tables are loaded into the
|
318
349
|
current experiment, the one described in the
|
319
350
|
experiment file. Existing data in tables with
|
320
351
|
the same names is overwritten!
|
321
352
|
|
322
353
|
--deltable <t> Remove database table <t>
|
323
|
-
|
354
|
+
|
324
355
|
--deltables Presents all tables in the database for interactive deletion
|
325
356
|
|
326
357
|
--delexp Remove the experiment described in the given experiment file,
|
@@ -337,14 +368,14 @@ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
|
|
337
368
|
system. If <D> is not given, feature files are written
|
338
369
|
to data_dir/<experiment_id>/your_feature_files/.
|
339
370
|
|
340
|
-
Uses the parameters --step, --testID, --logID to
|
371
|
+
Uses the parameters --step, --testID, --logID to
|
341
372
|
determine which feature files will be written.
|
342
373
|
|
343
374
|
--step <s> Use with --writefeatures: task for which to write features.
|
344
|
-
<s>=argrec: argument recognition,
|
375
|
+
<s>=argrec: argument recognition,
|
345
376
|
distinguish role from nonrole
|
346
377
|
<s>=arglab: argument labeling, naming roles
|
347
|
-
<s>=onestep: do argument labeling right away without
|
378
|
+
<s>=onestep: do argument labeling right away without
|
348
379
|
prior filtering of non-arguments
|
349
380
|
default: onestep.
|
350
381
|
|
@@ -352,28 +383,27 @@ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
|
|
352
383
|
for the the split with ID <l>.
|
353
384
|
|
354
385
|
--testID <i> Use with --writefeatures: write features
|
355
|
-
for the test set with ID <i>.
|
356
|
-
default: #{default_test_ID
|
386
|
+
for the test set with ID <i>.
|
387
|
+
default: #{::Shalmaneser::Rosy.default_test_ID}.
|
357
388
|
"
|
389
|
+
end
|
358
390
|
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
391
|
+
###
|
392
|
+
# options_hash:
|
393
|
+
#
|
394
|
+
# GetoptLong only allows you to access options via each(),
|
395
|
+
# not individually, and it only allows you to cycle through the options once.
|
396
|
+
# So we re-code the options as a hash
|
397
|
+
def self.options_hash(opts_obj) # GetoptLong object
|
398
|
+
opt_hash = {}
|
399
|
+
|
400
|
+
opts_obj.each do |opt, arg|
|
401
|
+
opt_hash[opt] = arg
|
402
|
+
end
|
403
|
+
|
404
|
+
opt_hash
|
372
405
|
end
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
end # class OptParser
|
378
|
-
|
379
|
-
end # module Rosy
|
406
|
+
|
407
|
+
end # class OptParser
|
408
|
+
end # module Rosy
|
409
|
+
end
|