shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,210 @@
1
+ ##############################
2
+ # ConfigFormatelement is an auxiliary class
3
+ # of ConfigData.
4
+ # It keeps track of feature patterns with variables in them
5
+ # that can be instantiated.
6
+ # @author Andrei Beliankou
7
+ #
8
+
9
+ require_relative 'configuration_error'
10
+
11
+ module Shalmaneser
12
+ module Configuration
13
+ class ConfigFormatElement
14
+
15
+ # given a pattern and a list of variable names,
16
+ # analyze the pattern and remember the variable names
17
+ #
18
+ def initialize(string, # string: feature name, may include names of variables.
19
+ # they are included in <>
20
+ variables) # list of variable names that can occur
21
+
22
+ @variables = variables
23
+
24
+ # pattern: this is what the 'string' is split into,
25
+ # an array of elements that are either fixed parts or variables.
26
+ # fixed part: pair [item:string, "string"]
27
+ # variable: pair [variable_name:string, "variable"]
28
+ @pattern = []
29
+ state = "out"
30
+ item = ""
31
+
32
+ # analyze string,
33
+ # split into variables and fixed parts
34
+ string.split(//).each { |char|
35
+ case state
36
+ when "in"
37
+ case char
38
+ when "<"
39
+ raise ConfigurationError, "Duplicate < in #{string}."
40
+ when ">"
41
+ unless @variables.include? item
42
+ raise ConfigurationError, "Unknown variable #{item}."
43
+ end
44
+ @pattern << [item, "variable"]
45
+ item = ""
46
+ state = "out"
47
+ else
48
+ item << char
49
+ state = "in"
50
+ end
51
+
52
+ when "out"
53
+ case char
54
+ when "<"
55
+ unless item.empty?
56
+ @pattern << [item, "string"]
57
+ item = ""
58
+ end
59
+ state = "in"
60
+ when ">"
61
+ raise ConfigurationError, "Unexpected > in #{string}."
62
+ else
63
+ item << char
64
+ state = "out"
65
+ end
66
+
67
+ else
68
+ raise ConfigurationError, "Shouldn't be here!"
69
+ end
70
+ }
71
+
72
+ # read through the whole of "string"
73
+ # end state has to be "out"
74
+ unless state == "out"
75
+ raise ConfigurationError, "Unclosed < in #{string}."
76
+ end
77
+
78
+ # last bit still to be recorded?
79
+ unless item.empty?
80
+ @pattern << [item, "string"]
81
+ end
82
+
83
+ # make regexp for matching this pattern
84
+ @regexp = make_regexp(@pattern)
85
+ end
86
+
87
+ # instantiate: given pairs of variable names and variable values,
88
+ # instantiate @pattern to a string in which var names are replaced
89
+ # by their values
90
+ #
91
+ # returns: string
92
+ def instantiate(var_hash) # hash variable name(string) => variable value(string)
93
+ # instantiate the pattern
94
+ @pattern.map do |item, string_or_var|
95
+ case string_or_var
96
+ when "string"
97
+ item
98
+ when "variable"
99
+ if var_hash[item].nil?
100
+ raise ConfigurationError, "Missing variable instantiation: #{item}."
101
+ end
102
+ var_hash[item]
103
+ else
104
+ raise ConfigurationError, "Shouldn't be here!"
105
+ end
106
+ end.join
107
+ end
108
+
109
+ # match()
110
+ #
111
+ # given a string, try to match it against the @pattern
112
+ # while setting the variables given in 'fillers' to
113
+ # the values given in that hash.
114
+ #
115
+ # returns: if the string matches, a hash variable name => value
116
+ # that includes the fillers given as a parameter as well as
117
+ # values for all other variables mentioned in @pattern,
118
+ # or false if no match.
119
+ def match(string, # a string
120
+ fillers = nil) # hash variable name(string) => value(string)
121
+
122
+ # have we been given partial info about variables?
123
+ if fillers
124
+ match = make_regexp(@pattern, fillers).match(string)
125
+ else
126
+ match = @regexp.match(string)
127
+ end
128
+
129
+ if match.nil?
130
+ # no match via the regular expression
131
+ return false
132
+ end
133
+
134
+ # regular expression matched.
135
+ # construct return value in hash
136
+ # retv: variable name(string) => value(string)
137
+ retv = {}
138
+ if fillers
139
+ # include given fillers in retv hash
140
+ fillers.each_pair { |name, val| retv[name] = val }
141
+ end
142
+
143
+ # now put values for other variables in @pattern into retv
144
+ index = 1
145
+ @pattern.to_a.select { |item, string_or_var|
146
+ string_or_var == "variable"
147
+ }.select { |item, string_or_var|
148
+ fillers.nil? or
149
+ fillers[item].nil?
150
+ }.each { |item, string_or_var|
151
+ # for all items on the pattern list
152
+ # that are variables and
153
+ # haven't been filled by the "fillers" list already:
154
+ # fill from matches
155
+
156
+ if match[index].nil?
157
+ raise ConfigurationError, "Match, but not enough matched elements? Strange."
158
+ end
159
+
160
+ if retv[item].nil?
161
+ retv[item] = match[index]
162
+ else
163
+ unless retv[item] == match[index]
164
+ return false
165
+ end
166
+ end
167
+
168
+ index += 1
169
+ }
170
+
171
+ retv
172
+ end
173
+
174
+ # used_variables
175
+ #
176
+ # returns: an array of variable names used in @pattern
177
+ def used_variables
178
+ @pattern.select do |_item, string_or_var|
179
+ string_or_var == "variable"
180
+ end.map { |item, _string_or_var| item }
181
+ end
182
+
183
+ ####################
184
+ private
185
+
186
+ # make_regexp:
187
+ # make regular expression from a pattern
188
+ # together with some variable fillers
189
+ #
190
+ # @return [Regexp] object
191
+ # @param [Array] pattern An array of pairs [string, "string"] or [string, "variable"]
192
+ # @param [Hash] fillers A Hash variable name(string) => value(string)
193
+ def make_regexp(pattern, fillers = nil)
194
+ pattern = pattern.map do |item, string_or_var|
195
+ case string_or_var
196
+ when "variable"
197
+ fillers && fillers[item] ? Regexp.escape(fillers[item]) : '(.+)'
198
+ when "string"
199
+ Regexp.escape(item)
200
+ else
201
+ # @todo Find the source of this error.
202
+ raise ConfiguratinError, "Shouldn't be here"
203
+ end
204
+ end.join
205
+
206
+ Regexp.new("^#{pattern}$")
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,15 @@
1
+ module Shalmaneser
2
+ module Configuration
3
+ class ConfigurationError < StandardError
4
+ # @param [String] msg A custom message for this exception.
5
+ # @param [Exception] nested_exception An external exception
6
+ # which is reused to provide more information.
7
+ def initialize(msg = nil, nested_exception = nil)
8
+ if nested_exception
9
+ msg = "#{nested_exception.class}: #{nested_exception.message}\n#{msg}"
10
+ end
11
+ super(msg)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,56 @@
1
+ # ExternalConfigData
2
+ # Katrin Erk January 2006
3
+ #
4
+ # All scripts that compute additional external knowledge sources
5
+ # for Fred and Rosy:
6
+ # access to configuration and experiment description file
7
+
8
+ require_relative 'config_data'
9
+
10
+ ##############################
11
+ # Class ExternalConfigData
12
+ #
13
+ # inherits from ConfigData,
14
+ # sets variable names appropriate to tasks of external knowledge sources
15
+ module Shalmaneser
16
+ module Configuration
17
+ class ExternalConfigData < ConfigData
18
+ def initialize(filename)
19
+ # initialize config data object
20
+ super(filename, # config file
21
+ { "directory" => "string", # features
22
+
23
+ "experiment_id" => "string",
24
+
25
+ "gfmap_restrict_to_downpath" => "bool",
26
+ "gfmap_restrict_pathlen" => "integer",
27
+ "gfmap_remove_gf" => "list"
28
+ },
29
+ [] # variables
30
+ )
31
+
32
+ # set access functions for list features
33
+ set_list_feature_access("gfmap_remove_gf",
34
+ method("access_as_stringlist"))
35
+ end
36
+
37
+ ###
38
+ protected
39
+ #####
40
+ # access_as_stringlist
41
+ #
42
+ # assumed format:
43
+ #
44
+ # lhs = rhs1 rhs2 ... rhsN
45
+ #
46
+ # given in val_list as string tuples [rhs1,...,rhsN]
47
+ #
48
+ # join the rhs strings by spaces, return as string
49
+ # "rhs1 rhs2 ... rhsN"
50
+ #
51
+ def access_as_stringlist(val_list) # array:array:string
52
+ val_list.map { |rhs| rhs.join(" ") }
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,134 @@
1
+ # FPrepConfigData
2
+ # Katrin Erk July 05
3
+ #
4
+ # Preprocessing for Fred and Rosy:
5
+ # access to a configuration and experiment description file
6
+
7
+ require_relative 'config_data'
8
+
9
+ ##############################
10
+ # Class FrappeConfigData
11
+ #
12
+ # inherits from ConfigData,
13
+ # sets variable names appropriate to preprocessing task
14
+ module Shalmaneser
15
+ module Configuration
16
+ class FrappeConfigData < ConfigData
17
+ VALID_ENCODINGS = ['hex', 'iso', 'utf8', nil]
18
+ VALID_INPUT_FORMATS = %w(Plain SalsaTab FNXml FNCorpusXml SalsaTigerXML)
19
+ CONFIG_DEFS = {
20
+ "prep_experiment_ID" => "string", # experiment identifier
21
+ "frprep_directory" => "string", # dir for frprep internal data
22
+ # information about the dataset
23
+ "language" => "string", # en, de
24
+ "origin" => "string", # FrameNet, Salsa, or nothing
25
+ "format" => "string", # Plain, SalsaTab, FNXml, FNCorpusXml, SalsaTigerXML
26
+ "encoding" => "string", # utf8, iso, hex, or nothing
27
+
28
+ # directories
29
+ "directory_input" => "string", # dir with input data
30
+ "directory_preprocessed" => "string", # dir with output Salsa/Tiger XML data
31
+ "directory_parserout" => "string", # dir with parser output for the parser named below
32
+
33
+ # syntactic processing
34
+ "pos_tagger" => "string", # name of POS tagger
35
+ "lemmatizer" => "string", # name of lemmatizer
36
+ "parser" => "string", # name of parser
37
+ "pos_tagger_path" => "string", # path to POS tagger
38
+ "lemmatizer_path" => "string", # path to lemmatizer
39
+ "parser_path" => "string", # path to parser
40
+ "parser_max_sent_num" => "integer", # max number of sentences per parser input file
41
+ "parser_max_sent_len" => "integer", # max sentence length the parser handles
42
+
43
+ "do_parse" => "bool", # use parser?
44
+ "do_lemmatize" => "bool",# use lemmatizer?
45
+ "do_postag" => "bool", # use POS tagger?
46
+
47
+ # output format: if tabformat_output == true,
48
+ # output in Tab format rather than Salsa/Tiger XML
49
+ # (this will not work if do_parse == true)
50
+ "tabformat_output" => "bool",
51
+
52
+ # syntactic repairs, dependent on existing semantic role annotation
53
+ "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
54
+ "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
55
+ }
56
+
57
+ # @param filename [String]
58
+ def initialize(filename)
59
+ # @param filename [String] path to a config file
60
+ # @param CONFIG_DEFS [Hash] a list of configuration definitions
61
+ super(filename, CONFIG_DEFS, [])
62
+ validate
63
+ end
64
+
65
+ # @return [True, False]
66
+ # Shall we convert our input files into the target encoding?
67
+ def convert_encoding?
68
+ get('encoding') != 'utf8'
69
+ end
70
+
71
+ private
72
+
73
+ # Validates semantically the input values from the experiment file.
74
+ # @todo Rework the whole validation engine, the parameter definitions
75
+ # should entail the information about: optional, obligatory,
76
+ # in combination with. This information should be stored in external
77
+ # resource files to easily change them.
78
+ # @todo Accumulate error messages.
79
+ def validate
80
+ msg = []
81
+
82
+ unless get('frprep_directory')
83
+ msg << 'Please set <frprep_directory>, the Frappe internal data '\
84
+ 'directory, in the experiment file.'
85
+ end
86
+
87
+ unless get('directory_input')
88
+ msg << 'Please specify <directory_input> in the Frappe experiment file.'
89
+ end
90
+
91
+ unless get('directory_preprocessed')
92
+ msg << 'Please specify <directory_preprocessed> in the experiment file.'
93
+ end
94
+
95
+ # sanity check: output in tab format will not work
96
+ # if we also do a parse
97
+ if get('tabformat_output') && get('do_parse')
98
+ msg << 'Error: Cannot do Tab format output when the input text is being'\
99
+ 'parsed. Please set either <tabformat_output> or <do_parse> to false.'
100
+ end
101
+
102
+ if get('do_postag') && !(get('pos_tagger_path') && get('pos_tagger'))
103
+ msg << 'POS Tagging: I need <pos_tagger> and <pos_tagger_path> '\
104
+ 'in the experiment file.'
105
+ end
106
+
107
+ if get('do_lemmatize') && !(get('lemmatizer_path') && get('lemmatizer'))
108
+ msg << 'Lemmatization: I need <lemmatizer> and <lemmatizer_path> in the experiment file.'
109
+ end
110
+
111
+ if get('do_parse') && !(get('parser_path') && get('parser'))
112
+ msg << 'Parsing: I need <parser> and <parser_path> in the experiment file.'
113
+ end
114
+
115
+ unless VALID_ENCODINGS.include?(get('encoding'))
116
+ msg << 'Please define a correct encoding in the configuration file: '\
117
+ "<#{VALID_ENCODINGS.join('>, <')}>!"
118
+ end
119
+
120
+ unless VALID_INPUT_FORMATS.include?(get('format'))
121
+ msg << 'Please define a correct input format in the configuration file: '\
122
+ "<#{VALID_INPUT_FORMATS.join('>, <')}>!"
123
+ end
124
+
125
+ unless get("prep_experiment_ID") =~ /^[A-Za-z0-9_]+$/
126
+ msg << 'Please choose an alphanumeric experiment ID! '\
127
+ "You provided: #{get('prep_experiment_ID')}"
128
+ end
129
+
130
+ raise(ConfigurationError, msg.join("\n")) if msg.any?
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,199 @@
1
+ # FredConfigData
2
+ # Katrin Erk April 05
3
+ #
4
+ # Frame disambiguation system:
5
+ # access to a configuration and experiment description file
6
+
7
+ require_relative 'config_data'
8
+ require 'definitions'
9
+ require 'logging'
10
+
11
+ ##############################
12
+ # Class FredConfigData
13
+ #
14
+ # inherits from ConfigData,
15
+ # sets variable names appropriate to WSD task
16
+ module Shalmaneser
17
+ module Configuration
18
+ class FredConfigData < ConfigData
19
+ VALID_TASKS = %w(featurize refeaturize split test eval)
20
+ CONFIG_DEFS = {
21
+ "experiment_ID" => "string", # experiment ID
22
+ "preproc_descr_file_train" => "string", # path to preprocessing files
23
+ "preproc_descr_file_test" => "string",
24
+ "directory_output" => "string", # path to Salsa/Tiger XML output directory
25
+
26
+ # @todo Verbosity should be handled by the Logger and only via cmd switches.
27
+ "verbose" => "bool", # print diagnostic messages?
28
+ "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
29
+
30
+ "fred_directory" => "string",# directory for internal info
31
+ "classifier_dir" => "string", # write classifiers here
32
+
33
+ "classifier" => "list", # classifiers
34
+
35
+ "dbtype" => "string", # "mysql" or "sqlite"
36
+
37
+ "host" => "string", # DB access: sqlite only
38
+ "user" => "string",
39
+ "passwd" => "string",
40
+ "dbname" => "string",
41
+
42
+ # featurization info
43
+ "feature" => "list", # which features to use for the classifier?
44
+ "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
45
+ "negsense" => "string", # binary classifier: negative sense is..?
46
+ "numerical_features" => "string", # do what with numerical features?
47
+
48
+ # what to do with items that have multiple senses?
49
+ # 'binarize': binary classifiers, and consider positive
50
+ # if the sense is among the gold senses
51
+ # 'join' : make one joint sense
52
+ # 'repeat' : make multiple occurrences of the item, one sense per occ
53
+ # 'keep' : keep as separate labels
54
+ #
55
+ # multilabel: consider as assigned all labels
56
+ # above a certain confidence threshold?
57
+ "handle_multilabel" => "string",
58
+ "assignment_confidence_threshold" => "float",
59
+
60
+ # single-sentence context?
61
+ "single_sent_context" => "bool",
62
+
63
+ # noncontiguous input? then we need access to a larger corpus
64
+ "noncontiguous_input" => "bool",
65
+ "larger_corpus_dir" => "string",
66
+ "larger_corpus_format" => "string",
67
+ "larger_corpus_encoding" => "string",
68
+
69
+ # Imported from PrepConfigData
70
+ 'do_postag' => 'bool',
71
+ 'do_lemmatize' => 'bool',
72
+ 'do_parse' => 'bool',
73
+ 'pos_tagger' => 'string',
74
+ 'lemmatizer' => 'string',
75
+ 'parser' => 'string',
76
+ 'directory_preprocessed' => 'string',
77
+ 'language' => 'string'
78
+ }
79
+
80
+ def initialize(filename)
81
+ super(filename, CONFIG_DEFS, ["train", "exp_ID"])
82
+ # set access functions for list features
83
+ set_list_feature_access("classifier", method("access_classifier"))
84
+ set_list_feature_access("feature", method("access_feature"))
85
+ validate
86
+ end
87
+
88
+ ###
89
+ # protected
90
+
91
+ #####
92
+ # access_feature
93
+ #
94
+ # access function for feature 'feature'
95
+ #
96
+ # assumed format:
97
+ #
98
+ # feature = context 50
99
+ # feature = context 2
100
+ # feature = syn
101
+ #
102
+ # i.e. first the name of the feature type to use, then
103
+ # optionally a parameter,
104
+ # and the same feature can occur more than once (which makes sense
105
+ # only in case of parameters)
106
+ #
107
+ #
108
+ # returns:
109
+ # - If a feature is given as a parameter,
110
+ # - If the feature is not set in the experiment file, nil
111
+ # - If the feature is set and has a parameter, the list of
112
+ # parameter values set for it. It is assumed that the parameters
113
+ # are integers, and they are returned as integers
114
+ # - If the feature is set and has no parameter, true
115
+ # - If no feature is given as parameter:
116
+ # a list of all features that have been set in the experiment file
117
+ # Each feature is given as a tuple: the first element is the feature (a string),
118
+ # all further elements are options (integers)
119
+ def access_feature(val_list, # array:array:string: list of tuples defined in config file
120
+ # for feature 'feature'
121
+ feature=nil) # string: feature type name
122
+
123
+ if feature
124
+ # access options for this feature
125
+
126
+ # get the right tuples
127
+ positives = val_list.select { |entries|
128
+ entries.first == feature
129
+ }.map { |entries|
130
+ entries[1]
131
+ }
132
+
133
+ if positives.empty?
134
+ # feature not defined
135
+ return nil
136
+
137
+ elsif positives.compact.empty?
138
+ # feature defined, but no parameters
139
+ return true
140
+
141
+ else
142
+ # feature defined, and has values
143
+ return positives.map { |par| par.to_i }
144
+ end
145
+
146
+ else
147
+ # return all features that have been set
148
+ return val_list.map { |feature_name, *options|
149
+ [feature_name] + options.map { |o| o.to_i }
150
+ }
151
+ end
152
+ end
153
+
154
+ #####
155
+ # access_classifier
156
+ #
157
+ # access function for feature 'classifier'
158
+ #
159
+ # assumed format in the config file:
160
+ #
161
+ # feature = path [option]*
162
+ #
163
+ # i.e. first the name of the feature type to use, then
164
+ # optionally options associated with that feature,
165
+ # e.g. 'argrec': use that feature only when computing argrec
166
+ #
167
+ # the access function is called with parameter val_list, an array of
168
+ # string tuples, one string tuple for each feature defined.
169
+ # the first string in the tuple is the feature name, the rest are the options
170
+ #
171
+ # returns: a list of pairs [feature_name(string), options(array:string)]
172
+ # of defined features
173
+ # @param val_list [Array] array:array:string: list of tuples defined
174
+ # in config file for feature 'feature'
175
+ def access_classifier(val_list)
176
+ if val_list.nil?
177
+ []
178
+ else
179
+ val_list.map do |cl_descr_tuple|
180
+ [cl_descr_tuple.first, cl_descr_tuple[1..-1]]
181
+ end
182
+ end
183
+ end
184
+
185
+ private
186
+
187
+ def validate
188
+ msg = []
189
+ =begin
190
+ unless VALID_TASKS.include?(get('encoding'))
191
+ msg << 'Please define a correct encoding in the configuration file: '\
192
+ "<#{VALID_ENCODINGS.join('>, <')}>!"
193
+ end
194
+ =end
195
+ raise(ConfigurationError, msg.join("\n")) if msg.any?
196
+ end
197
+ end
198
+ end
199
+ end