shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/rosy +14 -7
  4. data/lib/rosy/FailedParses.rb +22 -20
  5. data/lib/rosy/FeatureInfo.rb +35 -31
  6. data/lib/rosy/GfInduce.rb +132 -130
  7. data/lib/rosy/GfInduceFeature.rb +86 -68
  8. data/lib/rosy/InputData.rb +59 -55
  9. data/lib/rosy/RosyConfusability.rb +47 -40
  10. data/lib/rosy/RosyEval.rb +55 -55
  11. data/lib/rosy/RosyFeatureExtractors.rb +295 -290
  12. data/lib/rosy/RosyFeaturize.rb +54 -67
  13. data/lib/rosy/RosyInspect.rb +52 -50
  14. data/lib/rosy/RosyIterator.rb +73 -67
  15. data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
  16. data/lib/rosy/RosyPruning.rb +39 -31
  17. data/lib/rosy/RosyServices.rb +116 -115
  18. data/lib/rosy/RosySplit.rb +55 -53
  19. data/lib/rosy/RosyTask.rb +7 -3
  20. data/lib/rosy/RosyTest.rb +174 -191
  21. data/lib/rosy/RosyTrain.rb +46 -50
  22. data/lib/rosy/RosyTrainingTestTable.rb +101 -99
  23. data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
  24. data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
  25. data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
  26. data/lib/rosy/external_feature_extractor.rb +35 -0
  27. data/lib/rosy/opt_parser.rb +231 -201
  28. data/lib/rosy/rosy.rb +63 -64
  29. data/lib/rosy/rosy_conventions.rb +66 -0
  30. data/lib/rosy/rosy_error.rb +15 -0
  31. data/lib/rosy/var_var_restriction.rb +16 -0
  32. data/lib/shalmaneser/rosy.rb +1 -0
  33. metadata +26 -19
  34. data/lib/rosy/ExternalConfigData.rb +0 -58
  35. data/lib/rosy/View.rb +0 -418
  36. data/lib/rosy/rosy_config_data.rb +0 -121
  37. data/test/frprep/test_opt_parser.rb +0 -94
  38. data/test/functional/functional_test_helper.rb +0 -58
  39. data/test/functional/test_fred.rb +0 -47
  40. data/test/functional/test_frprep.rb +0 -99
  41. data/test/functional/test_rosy.rb +0 -40
@@ -1,9 +1,11 @@
1
+ module Shalmaneser
2
+ module Rosy
1
3
  module TargetsMostFrequentSc
2
- def determine_target_most_frequent_sc(view,
3
- noval,
4
+ def determine_target_most_frequent_sc(view,
5
+ noval,
4
6
  with_frame_default = nil)
5
- target_subcat = Hash.new()
6
- frame_subcat = Hash.new()
7
+ target_subcat = {}
8
+ frame_subcat = {}
7
9
 
8
10
  view.each_sentence { |sentence|
9
11
 
@@ -35,8 +37,8 @@ module TargetsMostFrequentSc
35
37
  } # each sentence of view
36
38
 
37
39
  # most frequent subcat for each target:
38
- retv = Hash.new()
39
- retv2 = Hash.new()
40
+ retv = {}
41
+ retv2 = {}
40
42
  [[retv, target_subcat], [retv2, frame_subcat]].each { |out_hash, in_hash|
41
43
 
42
44
  in_hash.each_pair { |key, subcats|
@@ -53,8 +55,10 @@ module TargetsMostFrequentSc
53
55
  end
54
56
 
55
57
  def tmf_target_key(instance)
56
- return instance["frame"] + "." +
57
- instance["target"] + "." +
58
- instance["target_pos"]
58
+ return instance["frame"] + "." +
59
+ instance["target"] + "." +
60
+ instance["target_pos"]
59
61
  end
60
62
  end
63
+ end
64
+ end
@@ -1,11 +1,13 @@
1
1
  # Katrin Erk November 05
2
- #
2
+ #
3
3
  # Abstract classes for
4
4
  # - Rosy features
5
5
  # - Rosy interface for external knowledge sources.
6
6
 
7
- require 'rosy/ExternalConfigData'
7
+ require 'configuration/external_config_data'
8
8
 
9
+ module Shalmaneser
10
+ module Rosy
9
11
  ####
10
12
  # Feature Extractor:
11
13
  # computes one or more features for a node (a SynNode object) out of
@@ -21,15 +23,15 @@ class AbstractFeatureExtractor
21
23
  # returns a string: the designator for this feature extractor
22
24
  # (an extractor may compute several features, but
23
25
  # in the experiment file it is chosen by a single designator)
24
- def AbstractFeatureExtractor.designator()
26
+ def self.designator
25
27
  raise "Overwrite me"
26
28
  end
27
29
 
28
30
  ###
29
- # returns an array of feature names, the names of the
31
+ # returns an array of feature names, the names of the
30
32
  # features that it can compute.
31
33
  # The number of features that the extractor computes must be fixed.
32
- def AbstractFeatureExtractor.feature_names()
34
+ def self.feature_names
33
35
  raise "Overwrite me."
34
36
  end
35
37
 
@@ -37,12 +39,12 @@ class AbstractFeatureExtractor
37
39
  # returns a string: the data type for the feature
38
40
  # to be passed on to the MySQL database,
39
41
  # e.g. VARCHAR(10), INT
40
- def AbstractFeatureExtractor.sql_type()
42
+ def self.sql_type
41
43
  raise "Overwrite me"
42
44
  end
43
45
 
44
46
  ###
45
- # returns a string: the feature type
47
+ # returns a string: the feature type
46
48
  # (the same for all features computed by this extractor)
47
49
  # possible values:
48
50
  # - gold: gold label
@@ -50,7 +52,7 @@ class AbstractFeatureExtractor
50
52
  # - syn: feature computed from syntactic characteristics of the instance
51
53
  # - sem: feature involving semantic characteristics of the instance
52
54
  # - sentlevel: this feature is the same for all instances of a sentence
53
- def AbstractFeatureExtractor.feature_type()
55
+ def self.feature_type
54
56
  raise "Overwrite me"
55
57
  end
56
58
 
@@ -59,19 +61,19 @@ class AbstractFeatureExtractor
59
61
  # depending on whether the feature is computed
60
62
  # directly from the SalsaTigerSentence and the SynNode objects
61
63
  # or whether it is computed from the phase 1 features
62
- def AbstractFeatureExtractor.phase()
64
+ def self.phase
63
65
  raise "Overwrite me."
64
66
  end
65
67
 
66
68
  ###
67
69
  # returns an array of strings, providing information about
68
70
  # the feature extractor
69
- def AbstractFeatureExtractor.info()
70
- return []
71
+ def self.info
72
+ []
71
73
  end
72
74
 
73
75
  ###
74
- # set sentence, set node, set other settings:
76
+ # set sentence, set node, set other settings:
75
77
  # this is done prior to
76
78
  # feature computation using compute_feature()
77
79
  # such that computations that stay the same for
@@ -84,10 +86,11 @@ class AbstractFeatureExtractor
84
86
  frame) # FrameNode object
85
87
  @@sent = sent
86
88
  @@frame = frame
87
-
89
+
88
90
  return true
89
91
  end
90
92
 
93
+ # @todo Rename and change the return value.
91
94
  def AbstractFeatureExtractor.set_node(node) # SynNode of the sentence set in set_sentence
92
95
  @@node = node
93
96
 
@@ -101,7 +104,7 @@ class AbstractFeatureExtractor
101
104
  # several features can be done in advance
102
105
  def AbstractFeatureExtractor.set(var_hash = {})
103
106
  # no settings at this point
104
-
107
+
105
108
  return true
106
109
  end
107
110
  # test during initialisation whether a feature is computable
@@ -124,12 +127,12 @@ class AbstractFeatureExtractor
124
127
  #
125
128
  # returns an array of features (strings), length the same as the
126
129
  # length of feature_names()
127
- def compute_features()
130
+ def compute_features
128
131
  raise "overwrite me"
129
132
  end
130
133
 
131
134
  ###
132
- # phase 2 extractors:
135
+ # phase 2 extractors:
133
136
  # compute features for a complete view
134
137
  #
135
138
  # returns: an array of columns,
@@ -139,7 +142,7 @@ class AbstractFeatureExtractor
139
142
  raise "overwrite me"
140
143
  end
141
144
 
142
- # At this place, we had abstract methods for "training" phase 2 features
145
+ # At this place, we had abstract methods for "training" phase 2 features
143
146
  # Since this involves introducing a "state" that is nontrivial to preserve
144
147
  # for a standalone version of the classifiers, without keeping the training data,
145
148
  # we decided to remove this functionality (30.11.05).
@@ -149,94 +152,16 @@ class AbstractFeatureExtractor
149
152
  ######
150
153
  protected
151
154
 
152
- def AbstractFeatureExtractor.announce_me()
155
+ def AbstractFeatureExtractor.announce_me
153
156
  # AB: In 1.9 constants are symbols.
154
157
  if Module.constants.include?("RosyFeatureInfo") or Module.constants.include?(:RosyFeatureInfo)
155
158
  # yup, we have a class to which we can announce ourselves
156
- RosyFeatureInfo.add_feature(eval(self.name()))
159
+ RosyFeatureInfo.add_feature(self)
157
160
  else
158
161
  # no interface collector class
159
162
  # $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
160
163
  end
161
164
  end
162
165
  end
163
-
164
- ################################################################
165
- # Wrapper class for extractors that compute a single feature
166
- class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
167
-
168
- ###
169
- # returns a string: the designator for this feature extractor
170
- # (an extractor may compute several features, but
171
- # in the experiment file it is chosen by a single designator)
172
- #
173
- # here: single feature, and the feature name is the designator
174
- def AbstractFeatureExtractor.designator()
175
- return eval(self.name()).feature_name()
176
- end
177
-
178
- ###
179
- def AbstractSingleFeatureExtractor.feature_names()
180
- return [eval(self.name()).feature_name()]
181
- end
182
-
183
- ###
184
- def compute_features()
185
- return [compute_feature()]
186
- end
187
-
188
- def compute_features_on_view(view) # DBView object
189
- return [compute_feature_on_view(view)]
190
- end
191
-
192
-
193
- ######
194
- # Single-feature methods
195
-
196
- ###
197
- def AbstractSingleFeatureExtractor.feature_name()
198
- raise "Overwrite me."
199
- end
200
-
201
- ###
202
- def compute_feature()
203
- raise "Overwrite me"
204
- end
205
-
206
- ###
207
- def compute_feature_on_view(view) # DBView object
208
- raise "Overwrite me"
209
- end
210
166
  end
211
-
212
- ######################################################
213
-
214
- class ExternalFeatureExtractor < AbstractFeatureExtractor
215
-
216
- @@warning_uttered = false
217
-
218
- ####
219
- # initialization:
220
- #
221
- # read experiment file for external interfaces
222
- def initialize(exp, # RosyConfigData object
223
- interpreter_class)
224
-
225
- @exp_rosy = exp
226
- @@interpreter_class = interpreter_class
227
-
228
- unless @exp_rosy.get("external_descr_file")
229
- unless @@warning_uttered
230
- $stderr.puts "Warning: Cannot compute external feature"
231
- $stderr.puts "since 'external_descr_file' has not been set"
232
- $stderr.puts "in the Rosy experiment file."
233
- @@warning_uttered = true
234
- end
235
-
236
- @exp_external = nil
237
- return
238
- end
239
-
240
- @exp_external = ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
241
- end
242
167
  end
@@ -0,0 +1,52 @@
1
+ require_relative 'abstract_feature_extractor'
2
+
3
+ module Shalmaneser
4
+ module Rosy
5
+ ################################################################
6
+ # Wrapper class for extractors that compute a single feature
7
+ class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
8
+
9
+ ###
10
+ # returns a string: the designator for this feature extractor
11
+ # (an extractor may compute several features, but
12
+ # in the experiment file it is chosen by a single designator)
13
+ #
14
+ # here: single feature, and the feature name is the designator
15
+ def self.designator
16
+ feature_name
17
+ end
18
+
19
+ ###
20
+ def self.feature_names
21
+ [feature_name]
22
+ end
23
+
24
+ ###
25
+ def compute_features
26
+ [compute_feature]
27
+ end
28
+
29
+ def compute_features_on_view(view) # DBView object
30
+ [compute_feature_on_view(view)]
31
+ end
32
+
33
+ ######
34
+ # Single-feature methods
35
+
36
+ ###
37
+ def self.feature_name
38
+ raise "Overwrite me."
39
+ end
40
+
41
+ ###
42
+ def compute_feature
43
+ raise "Overwrite me"
44
+ end
45
+
46
+ ###
47
+ def compute_feature_on_view(view) # DBView object
48
+ raise "Overwrite me"
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,35 @@
1
+ require_relative 'abstract_feature_extractor'
2
+ require 'configuration/external_config_data'
3
+
4
+ module Shalmaneser
5
+ module Rosy
6
+ class ExternalFeatureExtractor < AbstractFeatureExtractor
7
+
8
+ @@warning_uttered = false
9
+
10
+ ####
11
+ # initialization:
12
+ #
13
+ # read experiment file for external interfaces
14
+ # @param [RosyConfigData] exp object
15
+ def initialize(exp, interpreter_class)
16
+ @exp_rosy = exp
17
+ @@interpreter_class = interpreter_class
18
+
19
+ unless @exp_rosy.get("external_descr_file")
20
+ unless @@warning_uttered
21
+ $stderr.puts "Warning: Cannot compute external feature"
22
+ $stderr.puts "since 'external_descr_file' has not been set"
23
+ $stderr.puts "in the Rosy experiment file."
24
+ @@warning_uttered = true
25
+ end
26
+
27
+ @exp_external = nil
28
+ return
29
+ end
30
+
31
+ @exp_external = Shalmaneser::Configuration::ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
32
+ end
33
+ end
34
+ end
35
+ end
@@ -1,148 +1,179 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
3
  require 'getoptlong'
4
+ require 'definitions'
5
+ require 'configuration/rosy_config_data'
6
+
7
+ module Shalmaneser
8
+ module Rosy
9
+ class OptParser
10
+ def self.parse(cmd_args)
11
+ ##############################
12
+ # main starts here
13
+ ##############################
14
+
15
+ ##
16
+ # evaluate runtime arguments
17
+
18
+ tasks = {
19
+ "featurize" => [
20
+ # test table ID, required for test, no default
21
+ ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
22
+ # set to featurize: 'train' or 'test', no default
23
+ ['--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT],
24
+ # splitlog ID: if given, featurize this split. Cannot use both this and -d
25
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
26
+ ['--append', '-A', GetoptLong::NO_ARGUMENT]
27
+ ],
28
+ "split" => [
29
+ # splitlog ID, required, no default
30
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
31
+ # percentage training data, default: 90
32
+ ['--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT]
33
+ ],
34
+ "train" => [
35
+ # splitlog ID; if given, will train on split rather than all of main table
36
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
37
+ # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
38
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT]
39
+ ],
40
+ "test" => [
41
+ # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
42
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
43
+ # test table ID: if given, test on this table
44
+ ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
45
+ # splitlog ID: if given, test on this split. Cannot use both this and -i
46
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT],
47
+ # set this to prevent output of disambiguated test data
48
+ ['--nooutput', '-N', GetoptLong::NO_ARGUMENT]
49
+ ],
50
+ "eval" => [
51
+ # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
52
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
53
+ # test table ID: if given, test on this table
54
+ ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
55
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
56
+ ],
57
+ "inspect" => [
58
+ # describe all tables
59
+ ['--tables', GetoptLong::NO_ARGUMENT],
60
+ # describe table contents for current experiment
61
+ ['--tablecont', GetoptLong::OPTIONAL_ARGUMENT],
62
+ # test table ID: if given, describe contents of this table
63
+ ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
64
+ # describe classification runs for current experiment
65
+ ['--runs', GetoptLong::NO_ARGUMENT],
66
+ # list sentence IDs for given splitlog
67
+ ['--split', GetoptLong::REQUIRED_ARGUMENT]
68
+ ],
69
+ "services" => [
70
+ # delete database table
71
+ ['--deltable', GetoptLong::REQUIRED_ARGUMENT],
72
+ # delete experiment tables and files
73
+ ['--delexp', GetoptLong::NO_ARGUMENT],
74
+ # delete tables interactively
75
+ ['--deltables', GetoptLong::NO_ARGUMENT],
76
+ # delete runs
77
+ ['--delruns', GetoptLong::NO_ARGUMENT],
78
+ # delete split
79
+ ['--delsplit', GetoptLong::REQUIRED_ARGUMENT],
80
+ # dump experiment to files
81
+ ['--dump', GetoptLong::OPTIONAL_ARGUMENT],
82
+ # load experiment from files
83
+ ['--load', GetoptLong::OPTIONAL_ARGUMENT],
84
+ # write feature files
85
+ ['--writefeatures', GetoptLong::OPTIONAL_ARGUMENT],
86
+ # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
87
+ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT],
88
+ # test table ID: if given, test on this table
89
+ ['--testID', '-i', GetoptLong::REQUIRED_ARGUMENT],
90
+ # splitlog ID: if given, test on this split. Cannot use both this and -i
91
+ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
92
+ ]
93
+ }
94
+
95
+ optnames = [
96
+ # get help
97
+ ['--help', '-h', GetoptLong::NO_ARGUMENT],
98
+ # experiment file name (and path), no default
99
+ ['--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT],
100
+ # task to perform: one of task.keys, no default
101
+ ['--task', '-t', GetoptLong::REQUIRED_ARGUMENT]
102
+ ]
103
+
104
+ tasks.values.each { |more_optnames| optnames.concat more_optnames }
105
+
106
+ optnames.uniq!
107
+
108
+ begin
109
+ opts = GetoptLong.new(*optnames)
110
+ rescue => e
111
+ $stderr.puts "Error: unknown command line option: #{e.message}!"
112
+ exit 1
113
+ end
4
114
 
5
- require 'rosy/rosy_config_data'
6
-
7
- module Rosy
8
-
9
- class OptParser
10
- def self.parse(cmd_args)
11
-
12
- ##############################
13
- # main starts here
14
- ##############################
15
-
16
- ##
17
- # evaluate runtime arguments
18
-
19
- tasks = {
20
- "featurize" => [ [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID, required for test, no default
21
- [ '--dataset', '-d', GetoptLong::REQUIRED_ARGUMENT], # set to featurize: 'train' or 'test', no default
22
- ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, featurize this split. Cannot use both this and -d
23
- ['--append', '-A', GetoptLong::NO_ARGUMENT]
24
- ],
25
- "split" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID, required, no default
26
- [ '--trainpercent', '-r', GetoptLong::REQUIRED_ARGUMENT] # percentage training data, default: 90
27
- ],
28
- "train" => [ ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID; if given, will train on split rather than all of main table
29
- ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT] # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
30
- ],
31
- "test" => [ ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
32
- [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
33
- ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT], # splitlog ID: if given, test on this split. Cannot use both this and -i
34
- [ '--nooutput', '-N', GetoptLong::NO_ARGUMENT] # set this to prevent output of disambiguated test data
35
- ],
36
- "eval" => [['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
37
- [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
38
- ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT]
39
- ],
40
- "inspect" => [['--tables', GetoptLong::NO_ARGUMENT], # describe all tables
41
- [ '--tablecont', GetoptLong::OPTIONAL_ARGUMENT], # describe table contents for current experiment
42
- [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, describe contents of this table
43
- [ '--runs', GetoptLong::NO_ARGUMENT], # describe classification runs for current experiment
44
- [ '--split', GetoptLong::REQUIRED_ARGUMENT] # list sentence IDs for given splitlog
45
- ],
46
- "services" => [['--deltable', GetoptLong::REQUIRED_ARGUMENT], # delete database table
47
- [ '--delexp', GetoptLong::NO_ARGUMENT], # delete experiment tables and files
48
- [ '--deltables', GetoptLong::NO_ARGUMENT], # delete tables interactively
49
- [ '--delruns', GetoptLong::NO_ARGUMENT], # delete runs
50
- [ '--delsplit', GetoptLong::REQUIRED_ARGUMENT], # delete split
51
- [ '--dump', GetoptLong::OPTIONAL_ARGUMENT], # dump experiment to files
52
- [ '--load', GetoptLong::OPTIONAL_ARGUMENT], # load experiment from files
53
- [ '--writefeatures', GetoptLong::OPTIONAL_ARGUMENT], # write feature files
54
- ['--step', '-s', GetoptLong::REQUIRED_ARGUMENT], # classification step: 'argrec', 'arglab', 'both' (default) or 'onestep'
55
- [ '--testID', '-i', GetoptLong::REQUIRED_ARGUMENT], # test table ID: if given, test on this table
56
- ['--logID', '-l', GetoptLong::REQUIRED_ARGUMENT] # splitlog ID: if given, test on this split. Cannot use both this and -i
57
- ]
58
- }
59
-
60
- optnames = [[ '--help', '-h', GetoptLong::NO_ARGUMENT], # get help
61
- [ '--expfile', '-e', GetoptLong::REQUIRED_ARGUMENT], # experiment file name (and path), no default
62
- [ '--task', '-t', GetoptLong::REQUIRED_ARGUMENT ] # task to perform: one of task.keys, no default
63
- ]
64
-
65
- tasks.values.each { |more_optnames|
66
- optnames.concat more_optnames
67
- }
68
-
69
- optnames.uniq!
70
-
71
- # asterisk: "explode" array into individual parameters
72
- begin
73
- opts = options_hash(GetoptLong.new(*optnames))
74
- rescue
75
- $stderr.puts "Error: unknown command line option: " + $!
76
- exit 1
77
- end
78
-
79
- experiment_filename = nil
80
-
81
- ##
82
- # are we being asked for help?
83
- if opts['--help']
84
- help()
85
- exit(0)
86
- end
87
-
88
- ##
89
- # now find the task
90
- task = opts['--task']
91
- # sanity checks for task
92
- if task.nil?
93
- help()
94
- exit(0)
95
- end
96
- unless tasks.keys.include? task
97
- $stderr.puts "Sorry, I don't know the task '#{task}'. Do 'ruby rosy.rb -h' for a list of tasks."
98
- exit 1
99
- end
100
-
101
- ##
102
- # now evaluate the rest of the options
103
- opts.each_pair { |opt,arg|
104
- case opt
105
- when '--help', '--task'
115
+ experiment_filename = nil
116
+
117
+ opts = options_hash(opts)
118
+ ##
119
+ # are we being asked for help?
120
+ # @ todo work with the empty case
121
+ if opts['--help']
122
+ help
123
+ exit(0)
124
+ end
125
+
126
+ ##
127
+ # now find the task
128
+ task = opts['--task']
129
+ # sanity checks for task
130
+ if task.nil?
131
+ help
132
+ exit(0)
133
+ end
134
+
135
+ unless tasks.keys.include?(task)
136
+ $stderr.puts "Sorry, I don't know the task '#{task}'. Do 'rosy -h' for a list of tasks."
137
+ exit 1
138
+ end
139
+
140
+ ##
141
+ # now evaluate the rest of the options
142
+ opts.each_pair do |opt, arg|
143
+ case opt
144
+ when '--help', '--task'
106
145
  # we already handled this
107
- when '--expfile'
108
- experiment_filename = arg
109
- else
110
- # do we know this option?
111
- unless tasks[task].assoc(opt)
112
- $stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
113
- $stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
114
- exit 1
146
+ when '--expfile'
147
+ experiment_filename = arg
148
+ else
149
+ # do we know this option?
150
+ unless tasks[task].assoc(opt)
151
+ $stderr.puts "Sorry, I don't know the option " + opt + " for task " + task
152
+ $stderr.puts "Do 'ruby rosy.rb -h' for a list of tasks and options."
153
+ exit 1
154
+ end
115
155
  end
116
156
  end
117
- }
118
-
119
-
120
- if experiment_filename.nil?
121
- $stderr.puts "I need an experiment file name, option --expfile|-e"
122
- exit 1
123
- end
124
-
125
- ##
126
- # open config file
127
-
128
- exp = RosyConfigData.new(experiment_filename)
129
-
130
- # sanity checks
131
- unless exp.get("experiment_ID") =~ /^[A-Za-z0-9_]+$/
132
- $stderr.puts "Please choose an experiment ID consisting only of the letters A-Za-z0-9_."
133
- exit 1
157
+
158
+ # @todo This case is irreal since this restriction is set by the parser itself.
159
+ if experiment_filename.nil?
160
+ $stderr.puts "I need an experiment file name, option --expfile|-e"
161
+ exit 1
162
+ end
163
+
164
+ ##
165
+ # open config file
166
+
167
+ exp = ::Shalmaneser::Configuration::RosyConfigData.new(experiment_filename)
168
+
169
+ [exp, opts]
134
170
  end
135
-
136
- # enduser mode?
137
- $ENDUSER_MODE = exp.get("enduser_mode")
138
-
139
- [exp, opts]
140
- end
141
-
142
- private
143
- def self.help
144
- $stderr.puts "
145
- ROSY: semantic ROle assignment SYstem Version 0.2
171
+
172
+ private
173
+
174
+ def self.help
175
+ $stderr.puts "
176
+ ROSY: semantic ROle assignment SYstem, Version #{VERSION}
146
177
 
147
178
  Usage:
148
179
 
@@ -150,18 +181,18 @@ ruby rosy.rb --help|-h
150
181
 
151
182
  gets you this help text.
152
183
 
153
- ruby rosy.rb --task|-t featurize --expfile|-e <e>
154
- [--dataset|-d <d>] [--testID|-i <i>]
184
+ ruby rosy.rb --task|-t featurize --expfile|-e <e>
185
+ [--dataset|-d <d>] [--testID|-i <i>]
155
186
  [--logID|-l <l> ] [--append|-A]
156
187
  featurizes input data and stores it in a database.
157
188
  Enduser mode: dataset has to be 'test' (preset as default),
158
189
  no --append.
159
190
 
160
- --expfile <e> Use <e> as the experiment description and
191
+ --expfile <e> Use <e> as the experiment description and
161
192
  configuration file
162
193
 
163
- --dataset <d> Set to featurize: <d> is either 'train'
164
- (put data into main table) or 'test' (put data
194
+ --dataset <d> Set to featurize: <d> is either 'train'
195
+ (put data into main table) or 'test' (put data
165
196
  into separate test table with ID given using --testID)
166
197
  Use at least one of --logID, --dataset.
167
198
 
@@ -171,15 +202,15 @@ ruby rosy.rb --task|-t featurize --expfile|-e <e>
171
202
  Use at least one of --logID, --dataset.
172
203
 
173
204
  --testID <i> Use <i> as the ID for the table to store the test data.
174
- necessary only with '--dataset test'. default: #{default_test_ID()}.
205
+ necessary only with '--dataset test'. default: #{::Rosy.default_test_ID}.
175
206
 
176
- --append Do not overwrite previously computed features
207
+ --append Do not overwrite previously computed features
177
208
  for this experiment.
178
- Rather, append the new features
209
+ Rather, append the new features
179
210
  to the old featurization files.
180
211
  Default: overwrite
181
212
 
182
- ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
213
+ ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
183
214
  [--trainpercent|-r <r>]
184
215
  produces a new train/test split on the main table of the experiment.
185
216
  Not available in enduser mode.
@@ -188,7 +219,7 @@ ruby rosy.rb --task|-t split --expfile|-e <f> --logID|-l <l>
188
219
 
189
220
  --logID <l> Use <l> as the ID for storing this new split
190
221
 
191
- --trainpercent <r> Allocate <r> percent of the data as train,
222
+ --trainpercent <r> Allocate <r> percent of the data as train,
192
223
  and 100-<r> as test
193
224
  default: <r>=90
194
225
 
@@ -200,20 +231,20 @@ ruby rosy.rb --task|-t train --expfile|-e <f> [--step|-s <s>] [--logID|-l <l>]
200
231
  --expfile <f> Use <f> as the experiment description and configuration file
201
232
 
202
233
  --step <s> What kind of classifier(s) to train?
203
- <s>=argrec: argument recognition,
234
+ <s>=argrec: argument recognition,
204
235
  distinguish role from nonrole
205
- <s>=arglab: argument labeling, naming roles,
236
+ <s>=arglab: argument labeling, naming roles,
206
237
  builds on argrec
207
238
  <s>=both: first argrec, then arglab
208
239
  <s>=onestep: do argument labeling right away without
209
240
  prior filtering of non-arguments
210
241
  default: both
211
242
 
212
- --logID <l> If given, train on this split of the main table rather than
243
+ --logID <l> If given, train on this split of the main table rather than
213
244
  the whole main table
214
245
 
215
246
 
216
- ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
247
+ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
217
248
  [--logID|-l <l> | --testID|-i <i>] [--nooutput|-N]
218
249
  apply classifier(s) on data from a test table, or a main table split
219
250
  Enduser mode: only -s both, -s onestep available. Cleanup: Database with
@@ -222,12 +253,12 @@ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
222
253
  --expfile <f> Use <f> as the experiment description and configuration file
223
254
 
224
255
  --step <s> What kind of classifier(s) to use for testing?
225
- <s>=argrec: argument recognition,
256
+ <s>=argrec: argument recognition,
226
257
  distinguish role from nonrole
227
- <s>=arglab: argument labeling, naming roles,
258
+ <s>=arglab: argument labeling, naming roles,
228
259
  builds on argrec
229
260
  <s>=both: first argrec, then arglab
230
- <s>=onestep: do argument labeling right away without
261
+ <s>=onestep: do argument labeling right away without
231
262
  prior filtering of non-arguments
232
263
  default: both
233
264
  --logID <l> If given, test on this split of the main table
@@ -235,31 +266,31 @@ ruby rosy.rb --task|-t test --expfile|-e <f> [--step|-s <s>]
235
266
  --testID <i> If given, test on this test table.
236
267
  (Use either this option or -l)
237
268
 
238
- --nooutput Do not produce an output of the disambiguated test data
269
+ --nooutput Do not produce an output of the disambiguated test data
239
270
  in SalsaTigerXML format. This is useful if you just want
240
271
  to evaluate the system.
241
272
  Default: output is produced.
242
273
 
243
274
 
244
- ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
245
- [--logID|-l <l> | --testID|-i <i>
275
+ ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
276
+ [--logID|-l <l> | --testID|-i <i>
246
277
  evaluate the classification results.
247
278
  Not available in enduser mode.
248
279
 
249
280
  --expfile <f> Use <f> as the experiment description and configuration file
250
281
 
251
282
  --step <s> Evaluate results of which classification step?
252
- <s>=argrec: argument recognition,
283
+ <s>=argrec: argument recognition,
253
284
  distinguish role from nonrole
254
- <s>=arglab: argument labeling, naming roles,
285
+ <s>=arglab: argument labeling, naming roles,
255
286
  builds on argrec
256
287
  <s>=both: first argrec, then arglab
257
- <s>=onestep: do argument labeling right away without
288
+ <s>=onestep: do argument labeling right away without
258
289
  prior filtering of non-arguments
259
290
  default: both
260
291
  Need not be given if --runID is given.
261
292
 
262
- --logID <l> If given, evaluate on the test data from this split of
293
+ --logID <l> If given, evaluate on the test data from this split of
263
294
  the main table.
264
295
  (use either this option or -i or -R)
265
296
 
@@ -267,29 +298,29 @@ ruby rosy.rb --task|-t eval --expfile|-e <f> [--step|-s <s>]
267
298
  (Use either this option or -l or -R)
268
299
 
269
300
 
270
- ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
301
+ ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
271
302
  [--tablecont [N]] [--testID|-i <i>] [--split <l>]
272
- inspect system-internal data, both global and pertaining to the current
303
+ inspect system-internal data, both global and pertaining to the current
273
304
  experiment.
274
- If no options are chosen, an overview of the current experiment
305
+ If no options are chosen, an overview of the current experiment
275
306
  is given.
276
307
 
277
- --expfile <f> Use <f> as the experiment description and
308
+ --expfile <f> Use <f> as the experiment description and
278
309
  configuration file
279
310
 
280
311
  --tables Lists all tables of the DB: table name,column names
281
312
 
282
- --tablecont [N|id:N] Lists the training instances (as feature vectors)
313
+ --tablecont [N|id:N] Lists the training instances (as feature vectors)
283
314
  of the current experiment.
284
315
  If test ID is given, test instances are listed as well.
285
316
  The optional argument may have one of two forms:
286
- - It may be a number N. Then only the N first lines
317
+ - It may be a number N. Then only the N first lines
287
318
  of each set are listed.
288
319
  - It may be a pair id:N. Then only the N first lines of
289
320
  the DB table with ID id are listed. To list all lines
290
321
  of a single DB table, use id:
291
322
 
292
- --testID <i> If given, --tablecont also lists the feature vectors for
323
+ --testID <i> If given, --tablecont also lists the feature vectors for
293
324
  this test table
294
325
 
295
326
  --runs List all classification runs of the current experiment
@@ -298,29 +329,29 @@ ruby rosy.rb --task|-t inspect --expfile|-e <f> [--tables] [--runs]
298
329
 
299
330
  ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
300
331
  [--delexp] [--dump [<D>]] [--load [<D>]] [--delrun <R>]
301
- [--delsplit <l>] [--writefeatures [<D>]]
302
- [--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
332
+ [--delsplit <l>] [--writefeatures [<D>]]
333
+ [--step|-s <s>] [--testID|-i <i>] [--logID|-l <l> ]
303
334
  diverse services.
304
335
  The --del* services are not available in enduser mode.
305
336
 
306
337
  --dump [<D>] Dump the database tables for the current experiment file.
307
338
  If a directory <D> is given, the tables are written there,
308
- otherwise they are written to
309
- data_dir/<experiment_ID>/tables, where data_dir is the
339
+ otherwise they are written to
340
+ data_dir/<experiment_ID>/tables, where data_dir is the
310
341
  data directory given in the experiment file.
311
342
  No existing files in the directory are removed.
312
343
 
313
344
  --load [<D>] Construct new database tables from the files in
314
- the directory <D>, if it is given, otherwise from
315
- data_dir/<experiment_id>/tables, where data_dir
345
+ the directory <D>, if it is given, otherwise from
346
+ data_dir/<experiment_id>/tables, where data_dir
316
347
  is the data directory given in the experiment file.
317
- Warning: Database tables are loaded into the
348
+ Warning: Database tables are loaded into the
318
349
  current experiment, the one described in the
319
350
  experiment file. Existing data in tables with
320
351
  the same names is overwritten!
321
352
 
322
353
  --deltable <t> Remove database table <t>
323
-
354
+
324
355
  --deltables Presents all tables in the database for interactive deletion
325
356
 
326
357
  --delexp Remove the experiment described in the given experiment file,
@@ -337,14 +368,14 @@ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
337
368
  system. If <D> is not given, feature files are written
338
369
  to data_dir/<experiment_id>/your_feature_files/.
339
370
 
340
- Uses the parameters --step, --testID, --logID to
371
+ Uses the parameters --step, --testID, --logID to
341
372
  determine which feature files will be written.
342
373
 
343
374
  --step <s> Use with --writefeatures: task for which to write features.
344
- <s>=argrec: argument recognition,
375
+ <s>=argrec: argument recognition,
345
376
  distinguish role from nonrole
346
377
  <s>=arglab: argument labeling, naming roles
347
- <s>=onestep: do argument labeling right away without
378
+ <s>=onestep: do argument labeling right away without
348
379
  prior filtering of non-arguments
349
380
  default: onestep.
350
381
 
@@ -352,28 +383,27 @@ ruby rosy.rb --task|-t services --expfile|-e <f> [--deltable <t>]
352
383
  for the the split with ID <l>.
353
384
 
354
385
  --testID <i> Use with --writefeatures: write features
355
- for the test set with ID <i>.
356
- default: #{default_test_ID()}.
386
+ for the test set with ID <i>.
387
+ default: #{::Shalmaneser::Rosy.default_test_ID}.
357
388
  "
389
+ end
358
390
 
359
- end
360
-
361
- ###
362
- # options_hash:
363
- #
364
- # GetoptLong only allows you to access options via each(),
365
- # not individually, and it only allows you to cycle through the options once.
366
- # So we re-code the options as a hash
367
- def self.options_hash(opts_obj) # GetoptLong object
368
- opt_hash = Hash.new
369
-
370
- opts_obj.each do |opt, arg|
371
- opt_hash[opt] = arg
391
+ ###
392
+ # options_hash:
393
+ #
394
+ # GetoptLong only allows you to access options via each(),
395
+ # not individually, and it only allows you to cycle through the options once.
396
+ # So we re-code the options as a hash
397
+ def self.options_hash(opts_obj) # GetoptLong object
398
+ opt_hash = {}
399
+
400
+ opts_obj.each do |opt, arg|
401
+ opt_hash[opt] = arg
402
+ end
403
+
404
+ opt_hash
372
405
  end
373
-
374
- return opt_hash
375
- end
376
-
377
- end # class OptParser
378
-
379
- end # module Rosy
406
+
407
+ end # class OptParser
408
+ end # module Rosy
409
+ end