shalmaneser-lib 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,210 @@
1
+ ##############################
2
+ # ConfigFormatelement is an auxiliary class
3
+ # of ConfigData.
4
+ # It keeps track of feature patterns with variables in them
5
+ # that can be instantiated.
6
+ # @author Andrei Beliankou
7
+ #
8
+
9
+ require_relative 'configuration_error'
10
+
11
+ module Shalmaneser
12
+ module Configuration
13
+ class ConfigFormatElement
14
+
15
+ # given a pattern and a list of variable names,
16
+ # analyze the pattern and remember the variable names
17
+ #
18
+ def initialize(string, # string: feature name, may include names of variables.
19
+ # they are included in <>
20
+ variables) # list of variable names that can occur
21
+
22
+ @variables = variables
23
+
24
+ # pattern: this is what the 'string' is split into,
25
+ # an array of elements that are either fixed parts or variables.
26
+ # fixed part: pair [item:string, "string"]
27
+ # variable: pair [variable_name:string, "variable"]
28
+ @pattern = []
29
+ state = "out"
30
+ item = ""
31
+
32
+ # analyze string,
33
+ # split into variables and fixed parts
34
+ string.split(//).each { |char|
35
+ case state
36
+ when "in"
37
+ case char
38
+ when "<"
39
+ raise ConfigurationError, "Duplicate < in #{string}."
40
+ when ">"
41
+ unless @variables.include? item
42
+ raise ConfigurationError, "Unknown variable #{item}."
43
+ end
44
+ @pattern << [item, "variable"]
45
+ item = ""
46
+ state = "out"
47
+ else
48
+ item << char
49
+ state = "in"
50
+ end
51
+
52
+ when "out"
53
+ case char
54
+ when "<"
55
+ unless item.empty?
56
+ @pattern << [item, "string"]
57
+ item = ""
58
+ end
59
+ state = "in"
60
+ when ">"
61
+ raise ConfigurationError, "Unexpected > in #{string}."
62
+ else
63
+ item << char
64
+ state = "out"
65
+ end
66
+
67
+ else
68
+ raise ConfigurationError, "Shouldn't be here!"
69
+ end
70
+ }
71
+
72
+ # read through the whole of "string"
73
+ # end state has to be "out"
74
+ unless state == "out"
75
+ raise ConfigurationError, "Unclosed < in #{string}."
76
+ end
77
+
78
+ # last bit still to be recorded?
79
+ unless item.empty?
80
+ @pattern << [item, "string"]
81
+ end
82
+
83
+ # make regexp for matching this pattern
84
+ @regexp = make_regexp(@pattern)
85
+ end
86
+
87
+ # instantiate: given pairs of variable names and variable values,
88
+ # instantiate @pattern to a string in which var names are replaced
89
+ # by their values
90
+ #
91
+ # returns: string
92
+ def instantiate(var_hash) # hash variable name(string) => variable value(string)
93
+ # instantiate the pattern
94
+ @pattern.map do |item, string_or_var|
95
+ case string_or_var
96
+ when "string"
97
+ item
98
+ when "variable"
99
+ if var_hash[item].nil?
100
+ raise ConfigurationError, "Missing variable instantiation: #{item}."
101
+ end
102
+ var_hash[item]
103
+ else
104
+ raise ConfigurationError, "Shouldn't be here!"
105
+ end
106
+ end.join
107
+ end
108
+
109
+ # match()
110
+ #
111
+ # given a string, try to match it against the @pattern
112
+ # while setting the variables given in 'fillers' to
113
+ # the values given in that hash.
114
+ #
115
+ # returns: if the string matches, a hash variable name => value
116
+ # that includes the fillers given as a parameter as well as
117
+ # values for all other variables mentioned in @pattern,
118
+ # or false if no match.
119
+ def match(string, # a string
120
+ fillers = nil) # hash variable name(string) => value(string)
121
+
122
+ # have we been given partial info about variables?
123
+ if fillers
124
+ match = make_regexp(@pattern, fillers).match(string)
125
+ else
126
+ match = @regexp.match(string)
127
+ end
128
+
129
+ if match.nil?
130
+ # no match via the regular expression
131
+ return false
132
+ end
133
+
134
+ # regular expression matched.
135
+ # construct return value in hash
136
+ # retv: variable name(string) => value(string)
137
+ retv = {}
138
+ if fillers
139
+ # include given fillers in retv hash
140
+ fillers.each_pair { |name, val| retv[name] = val }
141
+ end
142
+
143
+ # now put values for other variables in @pattern into retv
144
+ index = 1
145
+ @pattern.to_a.select { |item, string_or_var|
146
+ string_or_var == "variable"
147
+ }.select { |item, string_or_var|
148
+ fillers.nil? or
149
+ fillers[item].nil?
150
+ }.each { |item, string_or_var|
151
+ # for all items on the pattern list
152
+ # that are variables and
153
+ # haven't been filled by the "fillers" list already:
154
+ # fill from matches
155
+
156
+ if match[index].nil?
157
+ raise ConfigurationError, "Match, but not enough matched elements? Strange."
158
+ end
159
+
160
+ if retv[item].nil?
161
+ retv[item] = match[index]
162
+ else
163
+ unless retv[item] == match[index]
164
+ return false
165
+ end
166
+ end
167
+
168
+ index += 1
169
+ }
170
+
171
+ retv
172
+ end
173
+
174
+ # used_variables
175
+ #
176
+ # returns: an array of variable names used in @pattern
177
+ def used_variables
178
+ @pattern.select do |_item, string_or_var|
179
+ string_or_var == "variable"
180
+ end.map { |item, _string_or_var| item }
181
+ end
182
+
183
+ ####################
184
+ private
185
+
186
+ # make_regexp:
187
+ # make regular expression from a pattern
188
+ # together with some variable fillers
189
+ #
190
+ # @return [Regexp] object
191
+ # @param [Array] pattern An array of pairs [string, "string"] or [string, "variable"]
192
+ # @param [Hash] fillers A Hash variable name(string) => value(string)
193
+ def make_regexp(pattern, fillers = nil)
194
+ pattern = pattern.map do |item, string_or_var|
195
+ case string_or_var
196
+ when "variable"
197
+ fillers && fillers[item] ? Regexp.escape(fillers[item]) : '(.+)'
198
+ when "string"
199
+ Regexp.escape(item)
200
+ else
201
+ # @todo Find the source of this error.
202
+ raise ConfiguratinError, "Shouldn't be here"
203
+ end
204
+ end.join
205
+
206
+ Regexp.new("^#{pattern}$")
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,15 @@
1
+ module Shalmaneser
2
+ module Configuration
3
+ class ConfigurationError < StandardError
4
+ # @param [String] msg A custom message for this exception.
5
+ # @param [Exception] nested_exception An external exception
6
+ # which is reused to provide more information.
7
+ def initialize(msg = nil, nested_exception = nil)
8
+ if nested_exception
9
+ msg = "#{nested_exception.class}: #{nested_exception.message}\n#{msg}"
10
+ end
11
+ super(msg)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,56 @@
1
+ # ExternalConfigData
2
+ # Katrin Erk January 2006
3
+ #
4
+ # All scripts that compute additional external knowledge sources
5
+ # for Fred and Rosy:
6
+ # access to configuration and experiment description file
7
+
8
+ require_relative 'config_data'
9
+
10
+ ##############################
11
+ # Class ExternalConfigData
12
+ #
13
+ # inherits from ConfigData,
14
+ # sets variable names appropriate to tasks of external knowledge sources
15
+ module Shalmaneser
16
+ module Configuration
17
+ class ExternalConfigData < ConfigData
18
+ def initialize(filename)
19
+ # initialize config data object
20
+ super(filename, # config file
21
+ { "directory" => "string", # features
22
+
23
+ "experiment_id" => "string",
24
+
25
+ "gfmap_restrict_to_downpath" => "bool",
26
+ "gfmap_restrict_pathlen" => "integer",
27
+ "gfmap_remove_gf" => "list"
28
+ },
29
+ [] # variables
30
+ )
31
+
32
+ # set access functions for list features
33
+ set_list_feature_access("gfmap_remove_gf",
34
+ method("access_as_stringlist"))
35
+ end
36
+
37
+ ###
38
+ protected
39
+ #####
40
+ # access_as_stringlist
41
+ #
42
+ # assumed format:
43
+ #
44
+ # lhs = rhs1 rhs2 ... rhsN
45
+ #
46
+ # given in val_list as string tuples [rhs1,...,rhsN]
47
+ #
48
+ # join the rhs strings by spaces, return as string
49
+ # "rhs1 rhs2 ... rhsN"
50
+ #
51
+ def access_as_stringlist(val_list) # array:array:string
52
+ val_list.map { |rhs| rhs.join(" ") }
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,134 @@
1
+ # FPrepConfigData
2
+ # Katrin Erk July 05
3
+ #
4
+ # Preprocessing for Fred and Rosy:
5
+ # access to a configuration and experiment description file
6
+
7
+ require_relative 'config_data'
8
+
9
+ ##############################
10
+ # Class FrappeConfigData
11
+ #
12
+ # inherits from ConfigData,
13
+ # sets variable names appropriate to preprocessing task
14
+ module Shalmaneser
15
+ module Configuration
16
+ class FrappeConfigData < ConfigData
17
+ VALID_ENCODINGS = ['hex', 'iso', 'utf8', nil]
18
+ VALID_INPUT_FORMATS = %w(Plain SalsaTab FNXml FNCorpusXml SalsaTigerXML)
19
+ CONFIG_DEFS = {
20
+ "prep_experiment_ID" => "string", # experiment identifier
21
+ "frprep_directory" => "string", # dir for frprep internal data
22
+ # information about the dataset
23
+ "language" => "string", # en, de
24
+ "origin" => "string", # FrameNet, Salsa, or nothing
25
+ "format" => "string", # Plain, SalsaTab, FNXml, FNCorpusXml, SalsaTigerXML
26
+ "encoding" => "string", # utf8, iso, hex, or nothing
27
+
28
+ # directories
29
+ "directory_input" => "string", # dir with input data
30
+ "directory_preprocessed" => "string", # dir with output Salsa/Tiger XML data
31
+ "directory_parserout" => "string", # dir with parser output for the parser named below
32
+
33
+ # syntactic processing
34
+ "pos_tagger" => "string", # name of POS tagger
35
+ "lemmatizer" => "string", # name of lemmatizer
36
+ "parser" => "string", # name of parser
37
+ "pos_tagger_path" => "string", # path to POS tagger
38
+ "lemmatizer_path" => "string", # path to lemmatizer
39
+ "parser_path" => "string", # path to parser
40
+ "parser_max_sent_num" => "integer", # max number of sentences per parser input file
41
+ "parser_max_sent_len" => "integer", # max sentence length the parser handles
42
+
43
+ "do_parse" => "bool", # use parser?
44
+ "do_lemmatize" => "bool",# use lemmatizer?
45
+ "do_postag" => "bool", # use POS tagger?
46
+
47
+ # output format: if tabformat_output == true,
48
+ # output in Tab format rather than Salsa/Tiger XML
49
+ # (this will not work if do_parse == true)
50
+ "tabformat_output" => "bool",
51
+
52
+ # syntactic repairs, dependent on existing semantic role annotation
53
+ "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
54
+ "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
55
+ }
56
+
57
+ # @param filename [String]
58
+ def initialize(filename)
59
+ # @param filename [String] path to a config file
60
+ # @param CONFIG_DEFS [Hash] a list of configuration definitions
61
+ super(filename, CONFIG_DEFS, [])
62
+ validate
63
+ end
64
+
65
+ # @return [True, False]
66
+ # Shall we convert our input files into the target encoding?
67
+ def convert_encoding?
68
+ get('encoding') != 'utf8'
69
+ end
70
+
71
+ private
72
+
73
+ # Validates semantically the input values from the experiment file.
74
+ # @todo Rework the whole validation engine, the parameter definitions
75
+ # should entail the information about: optional, obligatory,
76
+ # in combination with. This information should be stored in external
77
+ # resource files to easily change them.
78
+ # @todo Accumulate error messages.
79
+ def validate
80
+ msg = []
81
+
82
+ unless get('frprep_directory')
83
+ msg << 'Please set <frprep_directory>, the Frappe internal data '\
84
+ 'directory, in the experiment file.'
85
+ end
86
+
87
+ unless get('directory_input')
88
+ msg << 'Please specify <directory_input> in the Frappe experiment file.'
89
+ end
90
+
91
+ unless get('directory_preprocessed')
92
+ msg << 'Please specify <directory_preprocessed> in the experiment file.'
93
+ end
94
+
95
+ # sanity check: output in tab format will not work
96
+ # if we also do a parse
97
+ if get('tabformat_output') && get('do_parse')
98
+ msg << 'Error: Cannot do Tab format output when the input text is being'\
99
+ 'parsed. Please set either <tabformat_output> or <do_parse> to false.'
100
+ end
101
+
102
+ if get('do_postag') && !(get('pos_tagger_path') && get('pos_tagger'))
103
+ msg << 'POS Tagging: I need <pos_tagger> and <pos_tagger_path> '\
104
+ 'in the experiment file.'
105
+ end
106
+
107
+ if get('do_lemmatize') && !(get('lemmatizer_path') && get('lemmatizer'))
108
+ msg << 'Lemmatization: I need <lemmatizer> and <lemmatizer_path> in the experiment file.'
109
+ end
110
+
111
+ if get('do_parse') && !(get('parser_path') && get('parser'))
112
+ msg << 'Parsing: I need <parser> and <parser_path> in the experiment file.'
113
+ end
114
+
115
+ unless VALID_ENCODINGS.include?(get('encoding'))
116
+ msg << 'Please define a correct encoding in the configuration file: '\
117
+ "<#{VALID_ENCODINGS.join('>, <')}>!"
118
+ end
119
+
120
+ unless VALID_INPUT_FORMATS.include?(get('format'))
121
+ msg << 'Please define a correct input format in the configuration file: '\
122
+ "<#{VALID_INPUT_FORMATS.join('>, <')}>!"
123
+ end
124
+
125
+ unless get("prep_experiment_ID") =~ /^[A-Za-z0-9_]+$/
126
+ msg << 'Please choose an alphanumeric experiment ID! '\
127
+ "You provided: #{get('prep_experiment_ID')}"
128
+ end
129
+
130
+ raise(ConfigurationError, msg.join("\n")) if msg.any?
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,199 @@
1
+ # FredConfigData
2
+ # Katrin Erk April 05
3
+ #
4
+ # Frame disambiguation system:
5
+ # access to a configuration and experiment description file
6
+
7
+ require_relative 'config_data'
8
+ require 'definitions'
9
+ require 'logging'
10
+
11
+ ##############################
12
+ # Class FredConfigData
13
+ #
14
+ # inherits from ConfigData,
15
+ # sets variable names appropriate to WSD task
16
+ module Shalmaneser
17
+ module Configuration
18
+ class FredConfigData < ConfigData
19
+ VALID_TASKS = %w(featurize refeaturize split test eval)
20
+ CONFIG_DEFS = {
21
+ "experiment_ID" => "string", # experiment ID
22
+ "preproc_descr_file_train" => "string", # path to preprocessing files
23
+ "preproc_descr_file_test" => "string",
24
+ "directory_output" => "string", # path to Salsa/Tiger XML output directory
25
+
26
+ # @todo Verbosity should be handled by the Logger and only via cmd switches.
27
+ "verbose" => "bool", # print diagnostic messages?
28
+ "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
29
+
30
+ "fred_directory" => "string",# directory for internal info
31
+ "classifier_dir" => "string", # write classifiers here
32
+
33
+ "classifier" => "list", # classifiers
34
+
35
+ "dbtype" => "string", # "mysql" or "sqlite"
36
+
37
+ "host" => "string", # DB access: sqlite only
38
+ "user" => "string",
39
+ "passwd" => "string",
40
+ "dbname" => "string",
41
+
42
+ # featurization info
43
+ "feature" => "list", # which features to use for the classifier?
44
+ "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
45
+ "negsense" => "string", # binary classifier: negative sense is..?
46
+ "numerical_features" => "string", # do what with numerical features?
47
+
48
+ # what to do with items that have multiple senses?
49
+ # 'binarize': binary classifiers, and consider positive
50
+ # if the sense is among the gold senses
51
+ # 'join' : make one joint sense
52
+ # 'repeat' : make multiple occurrences of the item, one sense per occ
53
+ # 'keep' : keep as separate labels
54
+ #
55
+ # multilabel: consider as assigned all labels
56
+ # above a certain confidence threshold?
57
+ "handle_multilabel" => "string",
58
+ "assignment_confidence_threshold" => "float",
59
+
60
+ # single-sentence context?
61
+ "single_sent_context" => "bool",
62
+
63
+ # noncontiguous input? then we need access to a larger corpus
64
+ "noncontiguous_input" => "bool",
65
+ "larger_corpus_dir" => "string",
66
+ "larger_corpus_format" => "string",
67
+ "larger_corpus_encoding" => "string",
68
+
69
+ # Imported from PrepConfigData
70
+ 'do_postag' => 'bool',
71
+ 'do_lemmatize' => 'bool',
72
+ 'do_parse' => 'bool',
73
+ 'pos_tagger' => 'string',
74
+ 'lemmatizer' => 'string',
75
+ 'parser' => 'string',
76
+ 'directory_preprocessed' => 'string',
77
+ 'language' => 'string'
78
+ }
79
+
80
+ def initialize(filename)
81
+ super(filename, CONFIG_DEFS, ["train", "exp_ID"])
82
+ # set access functions for list features
83
+ set_list_feature_access("classifier", method("access_classifier"))
84
+ set_list_feature_access("feature", method("access_feature"))
85
+ validate
86
+ end
87
+
88
+ ###
89
+ # protected
90
+
91
+ #####
92
+ # access_feature
93
+ #
94
+ # access function for feature 'feature'
95
+ #
96
+ # assumed format:
97
+ #
98
+ # feature = context 50
99
+ # feature = context 2
100
+ # feature = syn
101
+ #
102
+ # i.e. first the name of the feature type to use, then
103
+ # optionally a parameter,
104
+ # and the same feature can occur more than once (which makes sense
105
+ # only in case of parameters)
106
+ #
107
+ #
108
+ # returns:
109
+ # - If a feature is given as a parameter,
110
+ # - If the feature is not set in the experiment file, nil
111
+ # - If the feature is set and has a parameter, the list of
112
+ # parameter values set for it. It is assumed that the parameters
113
+ # are integers, and they are returned as integers
114
+ # - If the feature is set and has no parameter, true
115
+ # - If no feature is given as parameter:
116
+ # a list of all features that have been set in the experiment file
117
+ # Each feature is given as a tuple: the first element is the feature (a string),
118
+ # all further elements are options (integers)
119
+ def access_feature(val_list, # array:array:string: list of tuples defined in config file
120
+ # for feature 'feature'
121
+ feature=nil) # string: feature type name
122
+
123
+ if feature
124
+ # access options for this feature
125
+
126
+ # get the right tuples
127
+ positives = val_list.select { |entries|
128
+ entries.first == feature
129
+ }.map { |entries|
130
+ entries[1]
131
+ }
132
+
133
+ if positives.empty?
134
+ # feature not defined
135
+ return nil
136
+
137
+ elsif positives.compact.empty?
138
+ # feature defined, but no parameters
139
+ return true
140
+
141
+ else
142
+ # feature defined, and has values
143
+ return positives.map { |par| par.to_i }
144
+ end
145
+
146
+ else
147
+ # return all features that have been set
148
+ return val_list.map { |feature_name, *options|
149
+ [feature_name] + options.map { |o| o.to_i }
150
+ }
151
+ end
152
+ end
153
+
154
+ #####
155
+ # access_classifier
156
+ #
157
+ # access function for feature 'classifier'
158
+ #
159
+ # assumed format in the config file:
160
+ #
161
+ # feature = path [option]*
162
+ #
163
+ # i.e. first the name of the feature type to use, then
164
+ # optionally options associated with that feature,
165
+ # e.g. 'argrec': use that feature only when computing argrec
166
+ #
167
+ # the access function is called with parameter val_list, an array of
168
+ # string tuples, one string tuple for each feature defined.
169
+ # the first string in the tuple is the feature name, the rest are the options
170
+ #
171
+ # returns: a list of pairs [feature_name(string), options(array:string)]
172
+ # of defined features
173
+ # @param val_list [Array] array:array:string: list of tuples defined
174
+ # in config file for feature 'feature'
175
+ def access_classifier(val_list)
176
+ if val_list.nil?
177
+ []
178
+ else
179
+ val_list.map do |cl_descr_tuple|
180
+ [cl_descr_tuple.first, cl_descr_tuple[1..-1]]
181
+ end
182
+ end
183
+ end
184
+
185
+ private
186
+
187
+ def validate
188
+ msg = []
189
+ =begin
190
+ unless VALID_TASKS.include?(get('encoding'))
191
+ msg << 'Please define a correct encoding in the configuration file: '\
192
+ "<#{VALID_ENCODINGS.join('>, <')}>!"
193
+ end
194
+ =end
195
+ raise(ConfigurationError, msg.join("\n")) if msg.any?
196
+ end
197
+ end
198
+ end
199
+ end