shalmaneser-lib 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 925773238e9b659fbfec8c118b4a22f4f2fb909b
4
+ data.tar.gz: da6bbfc8e3b8d33cc45ec17cd8c9c39044db86e2
5
+ SHA512:
6
+ metadata.gz: abeebd6acb3c45fe07bb13e5e281b711b30c296b0d4546fad9b8d0ce11dbc4617241efffaf56066b3bc49ac8216bf6ccae6f8a3bc12ad788129f111d63e609d2
7
+ data.tar.gz: 1d1e18b0bd144fe5aaa3058fed3e6fe5e5d80648c43e6d3e175daaf97d1ce491dbf80e7aceec9be387a65d2d6dade1d6b5d7407302442446d46a7f9afb473c61
@@ -0,0 +1,10 @@
1
+ --private
2
+ --protected
3
+ --title 'SHALMANESER'
4
+ lib/**/*.rb
5
+ bin/**/*
6
+ doc/**/*.md
7
+ -
8
+ CHANGELOG.md
9
+ LICENSE.md
10
+ doc/index.md
@@ -0,0 +1,4 @@
1
+ # Versions
2
+
3
+ ## Version 1.2.0-rc1
4
+
@@ -0,0 +1,4 @@
1
+ # LICENSE
2
+
3
+ This software is written in Ruby and is released under the [GNU Public License](http://www.gnu.org/licenses/gpl-2.0.html) (GPL v2), and the documentation under the [Free Document License](http://www.gnu.org/licenses/old-licenses/fdl-1.2.html) (FDL v1.2).
4
+
@@ -0,0 +1,122 @@
1
+ # SHALMANESER
2
+
3
+ [RubyGems](http://rubygems.org/gems/shalmaneser) |
4
+ [Shalmanesers Project Page](http://bu.chsta.be/projects/shalmaneser/) |
5
+ [Source Code](https://github.com/arbox/shalmaneser) |
6
+ [Bug Tracker](https://github.com/arbox/shalmaneser/issues)
7
+
8
+
9
+ [![Gem Version](https://img.shields.io/gem/v/shalmaneser.svg")](https://rubygems.org/gems/shalmaneser)
10
+ [![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/shalmaneser-prep)
11
+ [![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/shalmaneser-fred)
12
+ [![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/shalmaneser-rosy)
13
+
14
+
15
+ [![License GPL 2](http://img.shields.io/badge/License-GPL%202-green.svg)](http://www.gnu.org/licenses/gpl-2.0.txt)
16
+ [![Build Status](https://img.shields.io/travis/arbox/shalmaneser.svg?branch=1.2")](https://travis-ci.org/arbox/shalmaneser)
17
+ [![Code Climate](https://img.shields.io/codeclimate/github/arbox/shalmaneser.svg")](https://codeclimate.com/github/arbox/shalmaneser)
18
+ [![Dependency Status](https://img.shields.io/gemnasium/arbox/shalmaneser.svg")](https://gemnasium.com/arbox/shalmaneser)
19
+
20
+ [SHALMANESER](http://www.coli.uni-saarland.de/projects/salsa/shal/) is a SHALlow seMANtic parSER.
21
+
22
+ The name Shalmaneser is borrowed from John Brunner. He describes in his novel
23
+ "Stand on Zanzibar" an all knowing supercomputer baptized Shalmaneser.
24
+
25
+ Shalmaneser also has other origins like the king [Shalmaneser III](https://en.wikipedia.org/wiki/Shalmaneser_III).
26
+
27
+ > "SCANALYZER is the one single, the ONLY study of the news in depth
28
+ > that’s processed by General Technics’ famed computer Shalmaneser,
29
+ > who sees all, hears all, knows all save only that which YOU, Mr. and Mrs.
30
+ > Everywhere, wish to keep to yourselves." <br/>
31
+ > John Brunner (1968) "Stand on Zanzibar"
32
+
33
+ > But Shalmaneser is a Micryogenic® computer bathed in liquid helium and it’s cold in his vault. <br/>
34
+ > John Brunner (1968) "Stand on Zanzibar"
35
+
36
+ > “Of course not. Shalmaneser’s main task is to achieve the impossible again, a routine undertaking here at GT.” <br/>
37
+ > John Brunner (1968) "Stand on Zanzibar"
38
+
39
+ > “They programmed Shalmaneser with the formula for this stiffener, see, and…” <br/>
40
+ > John Brunner (1968) "Stand on Zanzibar"
41
+
42
+ > What am I going to do now? <br/>
43
+ > “All right, Shalmaneser!” <br/>
44
+ > John Brunner (1968) "Stand on Zanzibar"
45
+
46
+ > Shalmaneser is a Micryogenic® computer bathed in liquid helium and there’s no sign of Teresa. <br/>
47
+ > John Brunner (1968) "Stand on Zanzibar"
48
+
49
+ > Bathed in his currents of liquid helium, self-contained, immobile, vastly well informed by every mechanical sense: Shalmaneser. <br/>
50
+ > John Brunner (1968) "Stand on Zanzibar"
51
+
52
+ ## Description
53
+
54
+ Please be careful, the whole thing is under construction! For now Shalmaneser it not intended to run on Windows systems since it heavily uses system calls for external invocations.
55
+ Current versions of Shalmaneser have been tested on Linux only (other *NIX testers are welcome!).
56
+
57
+ Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called [SRL](https://en.wikipedia.org/wiki/Semantic_role_labeling) (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
58
+
59
+ For end users, we provide a simple end user mode which can simply apply the pre-trained classifiers
60
+ for [English](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (FrameNet 1.3 annotation / Collins parser)
61
+ and [German](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (SALSA 1.0 annotation / Sleepy parser).
62
+
63
+ We'll try to provide newer pretrained models for English, German, and possibly other languages as soon as possible.
64
+
65
+ For researchers interested in investigating shallow semantic parsing, our system is extensively configurable and extendable.
66
+
67
+ ## Origin
68
+
69
+ The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk, Alexander Koller, Ines Rehbein, Aljoscha Burchardt and others during their work in the SALSA Project.
70
+
71
+ You can find original versions of Shalmaneser up to ``1.1`` on the [SALSA](http://www.coli.uni-saarland.de/projects/salsa/shal/) project page.
72
+
73
+ ## Publications on Shalmaneser
74
+
75
+ - K. Erk and S. Padó: Shalmaneser - a flexible toolbox for semantic role assignment. Proceedings of LREC 2006, Genoa, Italy. [Click here for details](http://www.nlpado.de/~sebastian/pub/papers/lrec06_erk.pdf).
76
+
77
+ - TODO: add other works
78
+
79
+ ## Documentation
80
+
81
+ The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md) folder.
82
+
83
+ ## Development
84
+
85
+ We are working now only on the `master` branch. For different intermediate versions see corresponding tags.
86
+
87
+ ## Installation
88
+
89
+ See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md#installation) folder.
90
+
91
+ ### Tokenizers
92
+
93
+ - [Ucto](http://ilk.uvt.nl/ucto/)
94
+
95
+ ### POS Taggers
96
+
97
+ - [TreeTagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/)
98
+
99
+ ### Lemmatizers
100
+
101
+ - [TreeTagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/)
102
+
103
+ ### Parsers
104
+
105
+ - [BerkeleyParser](https://github.com/slavpetrov/berkeleyparser)
106
+ - [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml)
107
+ - [Collins Parser](http://www.cs.columbia.edu/~mcollins/code.html)
108
+
109
+ ### Machine Learning Systems
110
+
111
+ - [OpenNLP MaxEnt](http://sourceforge.net/projects/maxent/files/Maxent/2.4.0/)
112
+ - [Mallet](http://mallet.cs.umass.edu/index.php)
113
+
114
+ ## License
115
+
116
+ Shalmaneser is released under the `GPL v. 2.0` license as of the initial authors.
117
+
118
+ For a local copy of the full license text see the [LICENSE](LICENSE.md) file.
119
+
120
+ ## Contributing
121
+
122
+ Feel free to contact me via Github. Open an issue if you see problems or need help.
@@ -0,0 +1,457 @@
1
+ # class ConfigData:
2
+ #
3
+ # reads config data file,
4
+ # matches it against feature declarations given in its new() method,
5
+ # offers access methods for different kinds of features
6
+ #
7
+ # In the config file, all feature specifications have the form
8
+ #
9
+ # feature_name = feature_value
10
+ #
11
+ # where feature_name is a string without spaces. feature_value
12
+ # may include spaces, depending on the feature type (see below).
13
+ #
14
+ # To include a comment in a config file, start the comment line with
15
+ # '#'.
16
+ #
17
+ # Features are typed. The following types are supported:
18
+ #
19
+ # - normal types:
20
+ # "bool", "float", "integer", "string"
21
+ # For the get() function with which features in the ConfigData object
22
+ # are accessed, the values are transformed from the strings in the
23
+ # config file to the appropriate class: Boolean, Float, Integer, String
24
+ #
25
+ # - other types:
26
+ # pattern: This is a feature that may include variables in
27
+ # <> brackets. When this feature is accesssed,
28
+ # values for these variables are given, i.e. this
29
+ # pattern has to be instantiated.
30
+ # For example, given a feature
31
+ #
32
+ # fileformat = features.<type>.train
33
+ #
34
+ # and method call
35
+ # instantiate("fileformat", "type" => "path")
36
+ #
37
+ # what is returned is a string "features.path.train"
38
+ #
39
+ # Variables used in a pattern have to be declared to
40
+ # the new() method.
41
+ #
42
+ # list: This is the only feature type where more than one
43
+ # feature specification with the same feature_name is allowed.
44
+ # The right-hand sides of a list feature are stored in an array.
45
+ #
46
+ # Given a 'list' feature 'bla', if the config file contains
47
+ #
48
+ # bla = blupp 1 2
49
+ # bla = la di da
50
+ #
51
+ # the list feature 'bla' is represented as follows:
52
+ # @features['bla'] = [['blupp', 1,2], ['la', 'di', 'da']]
53
+ #
54
+ # For comfortable access to a list feature, arbitrary
55
+ # access functions for list features can be defined.
56
+ #
57
+ #
58
+
59
+ require_relative 'config_format_element'
60
+ require_relative 'configuration_error'
61
+ require 'ruby_class_extensions'
62
+ require 'logging'
63
+
64
+ #####################################################
65
+ ####################################################
66
+ # ConfigData is the main class in this package.
67
+ # It manages config files.
68
+ #
69
+ # To use it, inherit from it and just make a new new() method
70
+ # that only takes as input the name of the config file
71
+ # and that declares all the feature types and variable names
72
+ # needed for the given application.
73
+ #
74
+ # @abstract Subclass and override {#initialize} to implement
75
+ # a custom ConfigData class.
76
+ module Shalmaneser
77
+ module Configuration
78
+ # @abstract Subclass and override {#validate} to implement custom
79
+ # ConfigurationData classes.
80
+ # This class is responsible for the validation of the config.
81
+ class ConfigData
82
+ # Input parameters: the name of the config file, a hash declaring all
83
+ # features by mapping feature names to their types,
84
+ # and an array of all variables that may occur in pattern type features
85
+ #
86
+ # @param filename [String] a name of the configuration file
87
+ # @param feature_types [Hash] feature type definitions
88
+ # @param variables [Array] list of variables used in pattern features
89
+ # @raise [ConfigurationError]
90
+ def initialize(filename, feature_types, variables)
91
+ @variables = variables
92
+ @filename = filename
93
+
94
+ # feature_types: hash: feature_name => feature_type
95
+ @feature_types = feature_types
96
+
97
+ # features: hash: feature_name => value
98
+ @features = {}
99
+
100
+ # hash: feature_name => Proc
101
+ # access method for list features
102
+ @list_feature_access = {}
103
+
104
+ # pre-initialize list features to an empty array
105
+ @feature_types.each_pair do |feature_name, feature_type|
106
+ if feature_type == "list"
107
+ @features[feature_name] = []
108
+ end
109
+ end
110
+
111
+ ##
112
+ # open config file
113
+ # @todo Introduce custom exceptions to handle external errors.
114
+ begin
115
+ File.open(@filename, 'r') do |file|
116
+ while (line = file.gets)
117
+ line = line.strip
118
+ # Empty lines and comments.
119
+ next if line =~ /^#/ || line.empty?
120
+ feature_name, rhs = extract_def(line)
121
+ set_entry(feature_name, rhs)
122
+ end
123
+ end
124
+ rescue => e
125
+ msg = "Error: I could not open the experiment file: #{@filename}"
126
+ raise ConfigurationError.new(msg, e)
127
+ end
128
+ end
129
+
130
+ #####
131
+ # set_entry
132
+ #
133
+ # set an entry in the experiment file, either an existing or a new one
134
+ # but it must conform to the feature types declared in the new() method
135
+ def set_entry(feature_name, rhs)
136
+ unless @feature_types[feature_name]
137
+ msg = "Error in experiment file:\n"\
138
+ "Unknown parameter #{feature_name} in #{@filename}.\n"\
139
+ "Expected features for this type of experiment file:\n"\
140
+ "#{@feature_types.keys.join(', ')}"
141
+ raise ConfigurationError, msg
142
+ end
143
+
144
+ case @feature_types[feature_name]
145
+ when "pattern"
146
+ # file format specification
147
+
148
+ @features[feature_name] = ConfigFormatElement.new(rhs, @variables)
149
+
150
+ when "list"
151
+
152
+ # rhs is a string of space-separated words
153
+ # the first of them is the key, the rest is the value, to be
154
+ # stored as an array of words
155
+
156
+ # split rhs into words
157
+ if rhs.empty?
158
+ LOGGER.warn "WARNING: I got an empty value for list feature #{feature_name}. "\
159
+ "I'll ignore it."
160
+ else
161
+ unless @features[feature_name].include?(rhs.split)
162
+ @features[feature_name] << rhs.split
163
+ end
164
+ end
165
+ when "bool"
166
+ # boolean value
167
+ unless %w(true false).include?(rhs)
168
+ msg = "Error in experiment file:\n"\
169
+ "Value for #{feature_name} must be either 'true' or 'false'.\n"\
170
+ "I got: #{rhs}.\n"
171
+ raise ConfigurationError, msg
172
+ end
173
+
174
+ @features[feature_name] = (rhs == "true")
175
+ when "float"
176
+ # float value
177
+ @features[feature_name] = rhs.to_f
178
+ when "integer"
179
+ # integer value
180
+ @features[feature_name] = rhs.to_i
181
+ when "string"
182
+ # string value
183
+ @features[feature_name] = rhs
184
+ else
185
+ raise ConfigurationError,
186
+ "Unknown feature type for feature #{feature_name}: #{@feature_types[feature_name]}"
187
+ end
188
+ end
189
+
190
+ ####
191
+ # remove list entry in this config data structure:
192
+ # the lhs argument is the list feature name
193
+ # the rhs argument can be a string or a regexp.
194
+ # - string: each entry exactly matching the string is removed
195
+ # - regexp: each entry matching the regexp is removed
196
+ # @param [String] lhs feature name
197
+ # @param [String, Regexp] rhs righthand side
198
+ # @raise [ConfigurationError]
199
+ def unset_list_entry(lhs, rhs)
200
+ unless @feature_types[lhs] == "list"
201
+ msg = "Error in experiment file.\n"\
202
+ "Feature #{lhs} unknown or not of type list."
203
+ raise ConfigurationError, msg
204
+ end
205
+
206
+ case rhs
207
+ when String
208
+ rhs_match = Regexp.new("^" + Regexp.escape(rhs) + "$")
209
+ when Regexp
210
+ rhs_match = rhs
211
+ else
212
+ raise ConfigurationError, "Shouldn't be here: #{rhs.class}."
213
+ end
214
+
215
+ to_delete = @features[lhs].select { |entry| entry.join(" ") =~ rhs_match }
216
+ to_delete.each { |entry| @features[lhs].delete(entry) }
217
+ end
218
+
219
+
220
+ #####
221
+ # adjoin
222
+ #
223
+ # adds the information from a second ConfigData object
224
+ # to this one.
225
+ # Disjointness of feature names is assumed.
226
+ # @param [ConfigData] config_obj A ConfigData object.
227
+ def adjoin(config_obj)
228
+ # sanity checks:
229
+ # the other object must be a ConfigData object
230
+ unless config_obj.is_a?(ConfigData)
231
+ raise ConfigurationError, "I can only adjoin another ConfigData object"
232
+ end
233
+
234
+ # if feature name sets are not disjoint,
235
+ # ignore the feature names that I already have
236
+ other_features, other_feature_types, other_list_feature_access = config_obj.get_contents
237
+ unless (@feature_types.keys & other_feature_types.keys).empty?
238
+ other_features = other_features.clone
239
+ other_feature_types = other_feature_types.clone
240
+ other_list_feature_access = other_list_feature_access.clone
241
+
242
+ (@feature_types.keys & other_feature_types.keys).each do |overlap_feature|
243
+ other_features.delete(overlap_feature)
244
+ other_feature_types.delete(overlap_feature)
245
+ other_list_feature_access.delete(overlap_feature)
246
+ end
247
+ end
248
+
249
+ # now adjoin the contents of the other config objects to mine
250
+ @features.update(other_features)
251
+ @feature_types.update(other_feature_types)
252
+ @list_feature_access.update(other_list_feature_access)
253
+ end
254
+
255
+ #####
256
+ # get()
257
+ #
258
+ # returns the value of a given feature
259
+ # raises an error if no feature of this name
260
+ # has been declared to the new() method
261
+ #
262
+ # returns: a feature value. the type of the return value
263
+ # depends on the type of the feature.
264
+ # returns nil if the feature has not been set in the config file.
265
+ # @param name [String] name of the feature to access
266
+ def get(name)
267
+ if @feature_types[name].nil?
268
+ raise ConfigurationError, "Unknown feature: #{name}."
269
+ end
270
+
271
+ # may return nil if something has not been set
272
+ @features[name]
273
+ end
274
+
275
+ ####
276
+ # get_type
277
+ #
278
+ # returns the type of a given feature,
279
+ # or nil if it is undefined
280
+ def get_type(feature_name)
281
+ @feature_types[feature_name]
282
+ end
283
+
284
+ #####
285
+ # is_defined
286
+ #
287
+ # returns: true if a feature by this name has been set in the config file,
288
+ # false else
289
+ # @param feature [String] name of the feature
290
+ # @note This method is nowhere used.
291
+ def is_defined(feature)
292
+ @features[feature] ? true : false
293
+ end
294
+
295
+ #####
296
+ # instantiate
297
+ #
298
+ # given a pattern type feature, and a hash
299
+ # mapping all variables occurring in the pattern to
300
+ # values, instantiate the pattern
301
+ #
302
+ # returns: string, the pattern with all variables
303
+ # instantiated with their values
304
+ # @param [String] key Feature name.
305
+ # @param [Hash<String, String>] var_hash variable name(string) => value(string)
306
+ def instantiate(key, var_hash = {})
307
+ unless @feature_types[key] == "pattern"
308
+ raise ConfigurationError, "Nothing known about pattern: #{key}."
309
+ end
310
+
311
+ unless @features[key]
312
+ raise ConfigurationError, "Please define pattern in configuration file: #{key}."
313
+ end
314
+
315
+ # piece together the file name
316
+ # expand in case it is a filename/directory
317
+ @features[key].instantiate(var_hash)
318
+ end
319
+
320
+ #####
321
+ # get_filename:
322
+ #
323
+ # synonym for instantiate()
324
+ # @note What for?
325
+ def get_filename(key, var_hash = {})
326
+ instantiate(key, var_hash)
327
+ end
328
+
329
+ #####
330
+ # get_all_filenames
331
+ #
332
+ # given a directory, a pattern type feature,
333
+ # and a hash mapping some of the pattern's variables
334
+ # to values, return all filenames in the given directory
335
+ # that match the partially instantiated pattern
336
+ #
337
+ # returns: an array of pairs [filename(string), matches(hash)]
338
+ # where the matches hash maps all variables of the pattern to
339
+ # their values as instantiated in the given filename
340
+ # The filename doesn't include the directory.
341
+ # @param [String] dir Directory name.
342
+ # @param [String] key Name of the pattern type feature.
343
+ # @param [Hash<String, String>] var_hash variable name(string) => value(string)
344
+ def get_all_filenames(dir, key, var_hash = {})
345
+
346
+ unless @feature_types[key] == "pattern"
347
+ raise ConfigurationError, "Nothing known about file format #{key}."
348
+ end
349
+
350
+ # array of pairs [filename(string), matches(hash)]
351
+ filenames = []
352
+
353
+ # iterate through all files of this directory
354
+ Dir.foreach(dir) do |filename|
355
+ # Does the filename match the pattern of the feature "key"?
356
+ if (matches = @features[key].match(filename, var_hash))
357
+ # do the variable values for this filename conform
358
+ # to the variable values given in var_hash?
359
+ LOGGER.debug "ConfigData got #{filename}."
360
+
361
+ mismatches = var_hash.keys.select { |k| matches[k] != var_hash[k] }
362
+ if mismatches.empty?
363
+ filenames << [filename, matches]
364
+ else
365
+ msg = mismatches.each do |k|
366
+ "Mismatch for #{k}: #{matches[k]} vs. #{var_hash[k]}."
367
+ end.join("\n")
368
+ LOGGER.debug(msg)
369
+ end
370
+ end
371
+ end
372
+
373
+ filenames
374
+ end
375
+
376
+ #####
377
+ # set list feature access:
378
+ #
379
+ # for a given list type feature, set a method that should
380
+ # be used for accessing the feature.
381
+ #
382
+ # method signature: first parameter is an array of tuples of strings.
383
+ # for each experiment file entry
384
+ # feature = rhs
385
+ # there will be a tuple rhs.split() in the list.
386
+ #
387
+ # The other parameters are not checked by ConfigData, there
388
+ # may be arbitrarily many
389
+ def set_list_feature_access(feature_name, # string: name of the feature
390
+ proc) # proc: access method for list feature
391
+ unless @feature_types[feature_name] == 'list'
392
+ fail ConfigurationError,
393
+ "Cannot set list feature access to non-list feature #{feature_name}"
394
+ end
395
+
396
+ @list_feature_access[feature_name] = proc
397
+ end
398
+
399
+ #####
400
+ # get_lf
401
+ #
402
+ # access a list type feature for which an access function
403
+ # has been set using set_list_feature_access
404
+ #
405
+ # returns: whatever the access function returns
406
+ # @param [String] feature_name The name of a list feature.
407
+ # @param [Array] parameters for access function, collapsed into an array here
408
+ def get_lf(feature_name, *parameters)
409
+
410
+ unless @list_feature_access[feature_name]
411
+ raise ConfigurationError, "I have no list feature access method for #{feature_name}."
412
+ end
413
+
414
+ # call access function, re-exploding the collapsed parameters and
415
+ # adding the list of values for the list feature as first parameter
416
+ return @list_feature_access[feature_name].call(@features[feature_name], *parameters)
417
+ end
418
+
419
+ protected
420
+
421
+ #####
422
+ # extract_def
423
+ #
424
+ # given a line of the config file,
425
+ # it is assumed that it has the structure
426
+ # [white space] string [white space] = [white space] stuff
427
+ # 'stuff' may include further white space, 'string' may not.
428
+ #
429
+ # returns: a pair of strings, the left-hand side and the right-hand side
430
+ # of the =, minus the [white space] in the places shown above
431
+ # @param line [String] line from config file
432
+ def extract_def(line)
433
+ md = line.match(/^\s*(\w+)\s*=\s*([^\s].*)$/)
434
+ unless md
435
+ msg = "Error in experiment file:\n"\
436
+ "I couldn't analyze the following line:\n"\
437
+ "#{line}"
438
+ raise ConfigurationError, msg
439
+ end
440
+
441
+ [md[1], md[2]]
442
+ end
443
+
444
+ ####
445
+ # access to the object variables
446
+ def get_contents
447
+ [@features, @feature_types, @list_feature_access]
448
+ end
449
+
450
+ # Validate the semantics of parameters coming from the experiment files.
451
+ # @abstract Override this in subclasses.
452
+ def validate
453
+ raise NotImplementedError
454
+ end
455
+ end
456
+ end
457
+ end