shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 925773238e9b659fbfec8c118b4a22f4f2fb909b
4
+ data.tar.gz: da6bbfc8e3b8d33cc45ec17cd8c9c39044db86e2
5
+ SHA512:
6
+ metadata.gz: abeebd6acb3c45fe07bb13e5e281b711b30c296b0d4546fad9b8d0ce11dbc4617241efffaf56066b3bc49ac8216bf6ccae6f8a3bc12ad788129f111d63e609d2
7
+ data.tar.gz: 1d1e18b0bd144fe5aaa3058fed3e6fe5e5d80648c43e6d3e175daaf97d1ce491dbf80e7aceec9be387a65d2d6dade1d6b5d7407302442446d46a7f9afb473c61
@@ -0,0 +1,10 @@
1
+ --private
2
+ --protected
3
+ --title 'SHALMANESER'
4
+ lib/**/*.rb
5
+ bin/**/*
6
+ doc/**/*.md
7
+ -
8
+ CHANGELOG.md
9
+ LICENSE.md
10
+ doc/index.md
@@ -0,0 +1,4 @@
1
+ # Versions
2
+
3
+ ## Version 1.2.0-rc1
4
+
@@ -0,0 +1,4 @@
1
+ # LICENSE
2
+
3
+ This software is written in Ruby and is released under the [GNU Public License](http://www.gnu.org/licenses/gpl-2.0.html) (GPL v2), and the documentation under the [Free Document License](http://www.gnu.org/licenses/old-licenses/fdl-1.2.html) (FDL v1.2).
4
+
@@ -0,0 +1,122 @@
1
+ # SHALMANESER
2
+
3
+ [RubyGems](http://rubygems.org/gems/shalmaneser) |
4
+ [Shalmanesers Project Page](http://bu.chsta.be/projects/shalmaneser/) |
5
+ [Source Code](https://github.com/arbox/shalmaneser) |
6
+ [Bug Tracker](https://github.com/arbox/shalmaneser/issues)
7
+
8
+
9
+ [![Gem Version](https://img.shields.io/gem/v/shalmaneser.svg")](https://rubygems.org/gems/shalmaneser)
10
+ [![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/shalmaneser-prep)
11
+ [![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/shalmaneser-fred)
12
+ [![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/shalmaneser-rosy)
13
+
14
+
15
+ [![License GPL 2](http://img.shields.io/badge/License-GPL%202-green.svg)](http://www.gnu.org/licenses/gpl-2.0.txt)
16
+ [![Build Status](https://img.shields.io/travis/arbox/shalmaneser.svg?branch=1.2")](https://travis-ci.org/arbox/shalmaneser)
17
+ [![Code Climate](https://img.shields.io/codeclimate/github/arbox/shalmaneser.svg")](https://codeclimate.com/github/arbox/shalmaneser)
18
+ [![Dependency Status](https://img.shields.io/gemnasium/arbox/shalmaneser.svg")](https://gemnasium.com/arbox/shalmaneser)
19
+
20
+ [SHALMANESER](http://www.coli.uni-saarland.de/projects/salsa/shal/) is a SHALlow seMANtic parSER.
21
+
22
+ The name Shalmaneser is borrowed from John Brunner. He describes in his novel
23
+ "Stand on Zanzibar" an all knowing supercomputer baptized Shalmaneser.
24
+
25
+ Shalmaneser also has other origins like the king [Shalmaneser III](https://en.wikipedia.org/wiki/Shalmaneser_III).
26
+
27
+ > "SCANALYZER is the one single, the ONLY study of the news in depth
28
+ > that’s processed by General Technics’ famed computer Shalmaneser,
29
+ > who sees all, hears all, knows all save only that which YOU, Mr. and Mrs.
30
+ > Everywhere, wish to keep to yourselves." <br/>
31
+ > John Brunner (1968) "Stand on Zanzibar"
32
+
33
+ > But Shalmaneser is a Micryogenic® computer bathed in liquid helium and it’s cold in his vault. <br/>
34
+ > John Brunner (1968) "Stand on Zanzibar"
35
+
36
+ > “Of course not. Shalmaneser’s main task is to achieve the impossible again, a routine undertaking here at GT.” <br/>
37
+ > John Brunner (1968) "Stand on Zanzibar"
38
+
39
+ > “They programmed Shalmaneser with the formula for this stiffener, see, and…” <br/>
40
+ > John Brunner (1968) "Stand on Zanzibar"
41
+
42
+ > What am I going to do now? <br/>
43
+ > “All right, Shalmaneser!” <br/>
44
+ > John Brunner (1968) "Stand on Zanzibar"
45
+
46
+ > Shalmaneser is a Micryogenic® computer bathed in liquid helium and there’s no sign of Teresa. <br/>
47
+ > John Brunner (1968) "Stand on Zanzibar"
48
+
49
+ > Bathed in his currents of liquid helium, self-contained, immobile, vastly well informed by every mechanical sense: Shalmaneser. <br/>
50
+ > John Brunner (1968) "Stand on Zanzibar"
51
+
52
+ ## Description
53
+
54
+ Please be careful, the whole thing is under construction! For now Shalmaneser it not intended to run on Windows systems since it heavily uses system calls for external invocations.
55
+ Current versions of Shalmaneser have been tested on Linux only (other *NIX testers are welcome!).
56
+
57
+ Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called [SRL](https://en.wikipedia.org/wiki/Semantic_role_labeling) (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
58
+
59
+ For end users, we provide a simple end user mode which can simply apply the pre-trained classifiers
60
+ for [English](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (FrameNet 1.3 annotation / Collins parser)
61
+ and [German](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (SALSA 1.0 annotation / Sleepy parser).
62
+
63
+ We'll try to provide newer pretrained models for English, German, and possibly other languages as soon as possible.
64
+
65
+ For researchers interested in investigating shallow semantic parsing, our system is extensively configurable and extendable.
66
+
67
+ ## Origin
68
+
69
+ The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk, Alexander Koller, Ines Rehbein, Aljoscha Burchardt and others during their work in the SALSA Project.
70
+
71
+ You can find original versions of Shalmaneser up to ``1.1`` on the [SALSA](http://www.coli.uni-saarland.de/projects/salsa/shal/) project page.
72
+
73
+ ## Publications on Shalmaneser
74
+
75
+ - K. Erk and S. Padó: Shalmaneser - a flexible toolbox for semantic role assignment. Proceedings of LREC 2006, Genoa, Italy. [Click here for details](http://www.nlpado.de/~sebastian/pub/papers/lrec06_erk.pdf).
76
+
77
+ - TODO: add other works
78
+
79
+ ## Documentation
80
+
81
+ The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md) folder.
82
+
83
+ ## Development
84
+
85
+ We are working now only on the `master` branch. For different intermediate versions see corresponding tags.
86
+
87
+ ## Installation
88
+
89
+ See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md#installation) folder.
90
+
91
+ ### Tokenizers
92
+
93
+ - [Ucto](http://ilk.uvt.nl/ucto/)
94
+
95
+ ### POS Taggers
96
+
97
+ - [TreeTagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/)
98
+
99
+ ### Lemmatizers
100
+
101
+ - [TreeTagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/)
102
+
103
+ ### Parsers
104
+
105
+ - [BerkeleyParser](https://github.com/slavpetrov/berkeleyparser)
106
+ - [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml)
107
+ - [Collins Parser](http://www.cs.columbia.edu/~mcollins/code.html)
108
+
109
+ ### Machine Learning Systems
110
+
111
+ - [OpenNLP MaxEnt](http://sourceforge.net/projects/maxent/files/Maxent/2.4.0/)
112
+ - [Mallet](http://mallet.cs.umass.edu/index.php)
113
+
114
+ ## License
115
+
116
+ Shalmaneser is released under the `GPL v. 2.0` license as of the initial authors.
117
+
118
+ For a local copy of the full license text see the [LICENSE](LICENSE.md) file.
119
+
120
+ ## Contributing
121
+
122
+ Feel free to contact me via Github. Open an issue if you see problems or need help.
@@ -0,0 +1,457 @@
1
+ # class ConfigData:
2
+ #
3
+ # reads config data file,
4
+ # matches it against feature declarations given in its new() method,
5
+ # offers access methods for different kinds of features
6
+ #
7
+ # In the config file, all feature specifications have the form
8
+ #
9
+ # feature_name = feature_value
10
+ #
11
+ # where feature_name is a string without spaces. feature_value
12
+ # may include spaces, depending on the feature type (see below).
13
+ #
14
+ # To include a comment in a config file, start the comment line with
15
+ # '#'.
16
+ #
17
+ # Features are typed. The following types are supported:
18
+ #
19
+ # - normal types:
20
+ # "bool", "float", "integer", "string"
21
+ # For the get() function with which features in the ConfigData object
22
+ # are accessed, the values are transformed from the strings in the
23
+ # config file to the appropriate class: Boolean, Float, Integer, String
24
+ #
25
+ # - other types:
26
+ # pattern: This is a feature that may include variables in
27
+ # <> brackets. When this feature is accesssed,
28
+ # values for these variables are given, i.e. this
29
+ # pattern has to be instantiated.
30
+ # For example, given a feature
31
+ #
32
+ # fileformat = features.<type>.train
33
+ #
34
+ # and method call
35
+ # instantiate("fileformat", "type" => "path")
36
+ #
37
+ # what is returned is a string "features.path.train"
38
+ #
39
+ # Variables used in a pattern have to be declared to
40
+ # the new() method.
41
+ #
42
+ # list: This is the only feature type where more than one
43
+ # feature specification with the same feature_name is allowed.
44
+ # The right-hand sides of a list feature are stored in an array.
45
+ #
46
+ # Given a 'list' feature 'bla', if the config file contains
47
+ #
48
+ # bla = blupp 1 2
49
+ # bla = la di da
50
+ #
51
+ # the list feature 'bla' is represented as follows:
52
+ # @features['bla'] = [['blupp', 1,2], ['la', 'di', 'da']]
53
+ #
54
+ # For comfortable access to a list feature, arbitrary
55
+ # access functions for list features can be defined.
56
+ #
57
+ #
58
+
59
+ require_relative 'config_format_element'
60
+ require_relative 'configuration_error'
61
+ require 'ruby_class_extensions'
62
+ require 'logging'
63
+
64
+ #####################################################
65
+ ####################################################
66
+ # ConfigData is the main class in this package.
67
+ # It manages config files.
68
+ #
69
+ # To use it, inherit from it and just make a new new() method
70
+ # that only takes as input the name of the config file
71
+ # and that declares all the feature types and variable names
72
+ # needed for the given application.
73
+ #
74
+ # @abstract Subclass and override {#initialize} to implement
75
+ # a custom ConfigData class.
76
+ module Shalmaneser
77
+ module Configuration
78
+ # @abstract Subclass and override {#validate} to implement custom
79
+ # ConfigurationData classes.
80
+ # This class is responsible for the validation of the config.
81
+ class ConfigData
82
+ # Input parameters: the name of the config file, a hash declaring all
83
+ # features by mapping feature names to their types,
84
+ # and an array of all variables that may occur in pattern type features
85
+ #
86
+ # @param filename [String] a name of the configuration file
87
+ # @param feature_types [Hash] feature type definitions
88
+ # @param variables [Array] list of variables used in pattern features
89
+ # @raise [ConfigurationError]
90
+ def initialize(filename, feature_types, variables)
91
+ @variables = variables
92
+ @filename = filename
93
+
94
+ # feature_types: hash: feature_name => feature_type
95
+ @feature_types = feature_types
96
+
97
+ # features: hash: feature_name => value
98
+ @features = {}
99
+
100
+ # hash: feature_name => Proc
101
+ # access method for list features
102
+ @list_feature_access = {}
103
+
104
+ # pre-initialize list features to an empty array
105
+ @feature_types.each_pair do |feature_name, feature_type|
106
+ if feature_type == "list"
107
+ @features[feature_name] = []
108
+ end
109
+ end
110
+
111
+ ##
112
+ # open config file
113
+ # @todo Introduce custom exceptions to handle external errors.
114
+ begin
115
+ File.open(@filename, 'r') do |file|
116
+ while (line = file.gets)
117
+ line = line.strip
118
+ # Empty lines and comments.
119
+ next if line =~ /^#/ || line.empty?
120
+ feature_name, rhs = extract_def(line)
121
+ set_entry(feature_name, rhs)
122
+ end
123
+ end
124
+ rescue => e
125
+ msg = "Error: I could not open the experiment file: #{@filename}"
126
+ raise ConfigurationError.new(msg, e)
127
+ end
128
+ end
129
+
130
+ #####
131
+ # set_entry
132
+ #
133
+ # set an entry in the experiment file, either an existing or a new one
134
+ # but it must conform to the feature types declared in the new() method
135
+ def set_entry(feature_name, rhs)
136
+ unless @feature_types[feature_name]
137
+ msg = "Error in experiment file:\n"\
138
+ "Unknown parameter #{feature_name} in #{@filename}.\n"\
139
+ "Expected features for this type of experiment file:\n"\
140
+ "#{@feature_types.keys.join(', ')}"
141
+ raise ConfigurationError, msg
142
+ end
143
+
144
+ case @feature_types[feature_name]
145
+ when "pattern"
146
+ # file format specification
147
+
148
+ @features[feature_name] = ConfigFormatElement.new(rhs, @variables)
149
+
150
+ when "list"
151
+
152
+ # rhs is a string of space-separated words
153
+ # the first of them is the key, the rest is the value, to be
154
+ # stored as an array of words
155
+
156
+ # split rhs into words
157
+ if rhs.empty?
158
+ LOGGER.warn "WARNING: I got an empty value for list feature #{feature_name}. "\
159
+ "I'll ignore it."
160
+ else
161
+ unless @features[feature_name].include?(rhs.split)
162
+ @features[feature_name] << rhs.split
163
+ end
164
+ end
165
+ when "bool"
166
+ # boolean value
167
+ unless %w(true false).include?(rhs)
168
+ msg = "Error in experiment file:\n"\
169
+ "Value for #{feature_name} must be either 'true' or 'false'.\n"\
170
+ "I got: #{rhs}.\n"
171
+ raise ConfigurationError, msg
172
+ end
173
+
174
+ @features[feature_name] = (rhs == "true")
175
+ when "float"
176
+ # float value
177
+ @features[feature_name] = rhs.to_f
178
+ when "integer"
179
+ # integer value
180
+ @features[feature_name] = rhs.to_i
181
+ when "string"
182
+ # string value
183
+ @features[feature_name] = rhs
184
+ else
185
+ raise ConfigurationError,
186
+ "Unknown feature type for feature #{feature_name}: #{@feature_types[feature_name]}"
187
+ end
188
+ end
189
+
190
+ ####
191
+ # remove list entry in this config data structure:
192
+ # the lhs argument is the list feature name
193
+ # the rhs argument can be a string or a regexp.
194
+ # - string: each entry exactly matching the string is removed
195
+ # - regexp: each entry matching the regexp is removed
196
+ # @param [String] lhs feature name
197
+ # @param [String, Regexp] rhs righthand side
198
+ # @raise [ConfigurationError]
199
+ def unset_list_entry(lhs, rhs)
200
+ unless @feature_types[lhs] == "list"
201
+ msg = "Error in experiment file.\n"\
202
+ "Feature #{lhs} unknown or not of type list."
203
+ raise ConfigurationError, msg
204
+ end
205
+
206
+ case rhs
207
+ when String
208
+ rhs_match = Regexp.new("^" + Regexp.escape(rhs) + "$")
209
+ when Regexp
210
+ rhs_match = rhs
211
+ else
212
+ raise ConfigurationError, "Shouldn't be here: #{rhs.class}."
213
+ end
214
+
215
+ to_delete = @features[lhs].select { |entry| entry.join(" ") =~ rhs_match }
216
+ to_delete.each { |entry| @features[lhs].delete(entry) }
217
+ end
218
+
219
+
220
+ #####
221
+ # adjoin
222
+ #
223
+ # adds the information from a second ConfigData object
224
+ # to this one.
225
+ # Disjointness of feature names is assumed.
226
+ # @param [ConfigData] config_obj A ConfigData object.
227
+ def adjoin(config_obj)
228
+ # sanity checks:
229
+ # the other object must be a ConfigData object
230
+ unless config_obj.is_a?(ConfigData)
231
+ raise ConfigurationError, "I can only adjoin another ConfigData object"
232
+ end
233
+
234
+ # if feature name sets are not disjoint,
235
+ # ignore the feature names that I already have
236
+ other_features, other_feature_types, other_list_feature_access = config_obj.get_contents
237
+ unless (@feature_types.keys & other_feature_types.keys).empty?
238
+ other_features = other_features.clone
239
+ other_feature_types = other_feature_types.clone
240
+ other_list_feature_access = other_list_feature_access.clone
241
+
242
+ (@feature_types.keys & other_feature_types.keys).each do |overlap_feature|
243
+ other_features.delete(overlap_feature)
244
+ other_feature_types.delete(overlap_feature)
245
+ other_list_feature_access.delete(overlap_feature)
246
+ end
247
+ end
248
+
249
+ # now adjoin the contents of the other config objects to mine
250
+ @features.update(other_features)
251
+ @feature_types.update(other_feature_types)
252
+ @list_feature_access.update(other_list_feature_access)
253
+ end
254
+
255
+ #####
256
+ # get()
257
+ #
258
+ # returns the value of a given feature
259
+ # raises an error if no feature of this name
260
+ # has been declared to the new() method
261
+ #
262
+ # returns: a feature value. the type of the return value
263
+ # depends on the type of the feature.
264
+ # returns nil if the feature has not been set in the config file.
265
+ # @param name [String] name of the feature to access
266
+ def get(name)
267
+ if @feature_types[name].nil?
268
+ raise ConfigurationError, "Unknown feature: #{name}."
269
+ end
270
+
271
+ # may return nil if something has not been set
272
+ @features[name]
273
+ end
274
+
275
+ ####
276
+ # get_type
277
+ #
278
+ # returns the type of a given feature,
279
+ # or nil if it is undefined
280
+ def get_type(feature_name)
281
+ @feature_types[feature_name]
282
+ end
283
+
284
+ #####
285
+ # is_defined
286
+ #
287
+ # returns: true if a feature by this name has been set in the config file,
288
+ # false else
289
+ # @param feature [String] name of the feature
290
+ # @note This method is nowhere used.
291
+ def is_defined(feature)
292
+ @features[feature] ? true : false
293
+ end
294
+
295
+ #####
296
+ # instantiate
297
+ #
298
+ # given a pattern type feature, and a hash
299
+ # mapping all variables occurring in the pattern to
300
+ # values, instantiate the pattern
301
+ #
302
+ # returns: string, the pattern with all variables
303
+ # instantiated with their values
304
+ # @param [String] key Feature name.
305
+ # @param [Hash<String, String>] var_hash variable name(string) => value(string)
306
+ def instantiate(key, var_hash = {})
307
+ unless @feature_types[key] == "pattern"
308
+ raise ConfigurationError, "Nothing known about pattern: #{key}."
309
+ end
310
+
311
+ unless @features[key]
312
+ raise ConfigurationError, "Please define pattern in configuration file: #{key}."
313
+ end
314
+
315
+ # piece together the file name
316
+ # expand in case it is a filename/directory
317
+ @features[key].instantiate(var_hash)
318
+ end
319
+
320
+ #####
321
+ # get_filename:
322
+ #
323
+ # synonym for instantiate()
324
+ # @note What for?
325
+ def get_filename(key, var_hash = {})
326
+ instantiate(key, var_hash)
327
+ end
328
+
329
+ #####
330
+ # get_all_filenames
331
+ #
332
+ # given a directory, a pattern type feature,
333
+ # and a hash mapping some of the pattern's variables
334
+ # to values, return all filenames in the given directory
335
+ # that match the partially instantiated pattern
336
+ #
337
+ # returns: an array of pairs [filename(string), matches(hash)]
338
+ # where the matches hash maps all variables of the pattern to
339
+ # their values as instantiated in the given filename
340
+ # The filename doesn't include the directory.
341
+ # @param [String] dir Directory name.
342
+ # @param [String] key Name of the pattern type feature.
343
+ # @param [Hash<String, String>] var_hash variable name(string) => value(string)
344
+ def get_all_filenames(dir, key, var_hash = {})
345
+
346
+ unless @feature_types[key] == "pattern"
347
+ raise ConfigurationError, "Nothing known about file format #{key}."
348
+ end
349
+
350
+ # array of pairs [filename(string), matches(hash)]
351
+ filenames = []
352
+
353
+ # iterate through all files of this directory
354
+ Dir.foreach(dir) do |filename|
355
+ # Does the filename match the pattern of the feature "key"?
356
+ if (matches = @features[key].match(filename, var_hash))
357
+ # do the variable values for this filename conform
358
+ # to the variable values given in var_hash?
359
+ LOGGER.debug "ConfigData got #{filename}."
360
+
361
+ mismatches = var_hash.keys.select { |k| matches[k] != var_hash[k] }
362
+ if mismatches.empty?
363
+ filenames << [filename, matches]
364
+ else
365
+ msg = mismatches.each do |k|
366
+ "Mismatch for #{k}: #{matches[k]} vs. #{var_hash[k]}."
367
+ end.join("\n")
368
+ LOGGER.debug(msg)
369
+ end
370
+ end
371
+ end
372
+
373
+ filenames
374
+ end
375
+
376
+ #####
377
+ # set list feature access:
378
+ #
379
+ # for a given list type feature, set a method that should
380
+ # be used for accessing the feature.
381
+ #
382
+ # method signature: first parameter is an array of tuples of strings.
383
+ # for each experiment file entry
384
+ # feature = rhs
385
+ # there will be a tuple rhs.split() in the list.
386
+ #
387
+ # The other parameters are not checked by ConfigData, there
388
+ # may be arbitrarily many
389
+ def set_list_feature_access(feature_name, # string: name of the feature
390
+ proc) # proc: access method for list feature
391
+ unless @feature_types[feature_name] == 'list'
392
+ fail ConfigurationError,
393
+ "Cannot set list feature access to non-list feature #{feature_name}"
394
+ end
395
+
396
+ @list_feature_access[feature_name] = proc
397
+ end
398
+
399
+ #####
400
+ # get_lf
401
+ #
402
+ # access a list type feature for which an access function
403
+ # has been set using set_list_feature_access
404
+ #
405
+ # returns: whatever the access function returns
406
+ # @param [String] feature_name The name of a list feature.
407
+ # @param [Array] parameters for access function, collapsed into an array here
408
+ def get_lf(feature_name, *parameters)
409
+
410
+ unless @list_feature_access[feature_name]
411
+ raise ConfigurationError, "I have no list feature access method for #{feature_name}."
412
+ end
413
+
414
+ # call access function, re-exploding the collapsed parameters and
415
+ # adding the list of values for the list feature as first parameter
416
+ return @list_feature_access[feature_name].call(@features[feature_name], *parameters)
417
+ end
418
+
419
+ protected
420
+
421
+ #####
422
+ # extract_def
423
+ #
424
+ # given a line of the config file,
425
+ # it is assumed that it has the structure
426
+ # [white space] string [white space] = [white space] stuff
427
+ # 'stuff' may include further white space, 'string' may not.
428
+ #
429
+ # returns: a pair of strings, the left-hand side and the right-hand side
430
+ # of the =, minus the [white space] in the places shown above
431
+ # @param line [String] line from config file
432
+ def extract_def(line)
433
+ md = line.match(/^\s*(\w+)\s*=\s*([^\s].*)$/)
434
+ unless md
435
+ msg = "Error in experiment file:\n"\
436
+ "I couldn't analyze the following line:\n"\
437
+ "#{line}"
438
+ raise ConfigurationError, msg
439
+ end
440
+
441
+ [md[1], md[2]]
442
+ end
443
+
444
+ ####
445
+ # access to the object variables
446
+ def get_contents
447
+ [@features, @feature_types, @list_feature_access]
448
+ end
449
+
450
+ # Validate the semantics of parameters coming from the experiment files.
451
+ # @abstract Override this in subclasses.
452
+ def validate
453
+ raise NotImplementedError
454
+ end
455
+ end
456
+ end
457
+ end