frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,694 @@
|
|
1
|
+
# class ConfigData:
|
2
|
+
#
|
3
|
+
# reads config data file,
|
4
|
+
# matches it against feature declarations given in its new() method,
|
5
|
+
# offers access methods for different kinds of features
|
6
|
+
#
|
7
|
+
# In the config file, all feature specifications have the form
|
8
|
+
#
|
9
|
+
# feature_name = feature_value
|
10
|
+
#
|
11
|
+
# where feature_name is a string without spaces. feature_value
|
12
|
+
# may include spaces, depending on the feature type (see below).
|
13
|
+
#
|
14
|
+
# To include a comment in a config file, start the comment line with
|
15
|
+
# '#'.
|
16
|
+
#
|
17
|
+
# Features are typed. The following types are supported:
|
18
|
+
#
|
19
|
+
# - normal types:
|
20
|
+
# "bool", "float", "integer", "string"
|
21
|
+
# For the get() function with which features in the ConfigData object
|
22
|
+
# are accessed, the values are transformed from the strings in the
|
23
|
+
# config file to the appropriate class: Boolean, Float, Integer, String
|
24
|
+
#
|
25
|
+
# - other types:
|
26
|
+
# pattern: This is a feature that may include variables in
|
27
|
+
# <> brackets. When this feature is accesssed,
|
28
|
+
# values for these variables are given, i.e. this
|
29
|
+
# pattern has to be instantiated.
|
30
|
+
# For example, given a feature
|
31
|
+
#
|
32
|
+
# fileformat = features.<type>.train
|
33
|
+
#
|
34
|
+
# and method call
|
35
|
+
# instantiate("fileformat", "type" => "path")
|
36
|
+
#
|
37
|
+
# what is returned is a string "features.path.train"
|
38
|
+
#
|
39
|
+
# Variables used in a pattern have to be declared to
|
40
|
+
# the new() method.
|
41
|
+
#
|
42
|
+
# list: This is the only feature type where more than one
|
43
|
+
# feature specification with the same feature_name is allowed.
|
44
|
+
# The right-hand sides of a list feature are stored in an array.
|
45
|
+
#
|
46
|
+
# Given a 'list' feature 'bla', if the config file contains
|
47
|
+
#
|
48
|
+
# bla = blupp 1 2
|
49
|
+
# bla = la di da
|
50
|
+
#
|
51
|
+
# the list feature 'bla' is represented as follows:
|
52
|
+
# @features['bla'] = [['blupp', 1,2], ['la', 'di', 'da']]
|
53
|
+
#
|
54
|
+
# For comfortable access to a list feature, arbitrary
|
55
|
+
# access functions for list features can be defined.
|
56
|
+
#
|
57
|
+
#
|
58
|
+
|
59
|
+
require 'frprep/ruby_class_extensions'
|
60
|
+
|
61
|
+
|
62
|
+
#####################################################
|
63
|
+
####################################################
|
64
|
+
# ConfigData is the main class in this package.
|
65
|
+
# It manages config files.
|
66
|
+
#
|
67
|
+
# To use it, inherit from it and just make a new new() method
|
68
|
+
# that only takes as input the name of the config file
|
69
|
+
# and that declares all the feature types and variable names
|
70
|
+
# needed for the given application.
|
71
|
+
|
72
|
+
class ConfigData
|
73
|
+
|
74
|
+
###########
|
75
|
+
# new()
|
76
|
+
#
|
77
|
+
# reads the config file
|
78
|
+
#
|
79
|
+
# Input parameters: the name of the config file, a hash declaring all
|
80
|
+
# features by mapping feature names to their types,
|
81
|
+
# and an array of all variables that may occur in pattern type features
|
82
|
+
#
|
83
|
+
def initialize(filename, # string: name of config file
|
84
|
+
feature_types, # hash: feature_name => feature_type
|
85
|
+
variables) # array of strings: list of variables used in pattern features
|
86
|
+
|
87
|
+
@test_print = false
|
88
|
+
@variables = variables
|
89
|
+
@original_filename = filename
|
90
|
+
|
91
|
+
##
|
92
|
+
# open config file
|
93
|
+
begin
|
94
|
+
file = File.new(filename)
|
95
|
+
rescue
|
96
|
+
$stderr.puts "Error: I could not open the experiment file " + filename
|
97
|
+
exit 1
|
98
|
+
end
|
99
|
+
|
100
|
+
# feature_types: hash: feature_name => feature_type
|
101
|
+
# features: hash: feature_name => value
|
102
|
+
@feature_types = feature_types
|
103
|
+
@features = Hash.new
|
104
|
+
|
105
|
+
# @list_feature_access: hash feature_name => Proc
|
106
|
+
# access method for list features
|
107
|
+
@list_feature_access = Hash.new
|
108
|
+
|
109
|
+
# pre-initialize list features to an empty array
|
110
|
+
@feature_types.each_pair { |feature_name, feature_type|
|
111
|
+
if feature_type == "list"
|
112
|
+
@features[feature_name] = Array.new
|
113
|
+
end
|
114
|
+
}
|
115
|
+
|
116
|
+
##
|
117
|
+
# examine the config file contents
|
118
|
+
|
119
|
+
while (line = file.gets())
|
120
|
+
line = line.chomp().strip()
|
121
|
+
if line =~ /^#/ # comment
|
122
|
+
next
|
123
|
+
end
|
124
|
+
|
125
|
+
if line.empty? # nothing to be seen here
|
126
|
+
next
|
127
|
+
end
|
128
|
+
|
129
|
+
feature_name, rhs = extract_def(line)
|
130
|
+
set_entry(feature_name, rhs)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
#####
|
135
|
+
# set_entry
|
136
|
+
#
|
137
|
+
# set an entry in the experiment file, either an existing or a new one
|
138
|
+
# but it must conform to the feature types declared in the new() method
|
139
|
+
def set_entry(feature_name, rhs)
|
140
|
+
|
141
|
+
unless @feature_types[feature_name]
|
142
|
+
$stderr.puts "Error in experiment file:"
|
143
|
+
$stderr.puts "Unknown parameter #{feature_name} in #{@original_filename}."
|
144
|
+
$stderr.puts "Expected features for this type of experiment file:"
|
145
|
+
$stderr.puts @feature_types.keys().join(", ")
|
146
|
+
exit 1
|
147
|
+
end
|
148
|
+
|
149
|
+
case @feature_types[feature_name]
|
150
|
+
when "pattern"
|
151
|
+
# file format specification
|
152
|
+
|
153
|
+
@features[feature_name] = ConfigFormatElement.new(rhs, @variables)
|
154
|
+
|
155
|
+
when "list"
|
156
|
+
|
157
|
+
# rhs is a string of space-separated words
|
158
|
+
# the first of them is the key, the rest is the value, to be
|
159
|
+
# stored as an array of words
|
160
|
+
|
161
|
+
# split rhs into words
|
162
|
+
if rhs.empty?
|
163
|
+
$stderr.puts "WARNING: I got an empty value for list feature #{feature_name}."
|
164
|
+
$stderr.puts "I'll ignore it."
|
165
|
+
else
|
166
|
+
unless @features[feature_name].include? rhs.split()
|
167
|
+
@features[feature_name] << rhs.split()
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
when "bool"
|
172
|
+
# boolean value
|
173
|
+
unless ["true", "false"].include? rhs
|
174
|
+
$stderr.puts "Error in experiment file:"
|
175
|
+
$stderr.puts "Value for #{feature_name} must be either 'true' or 'false'."
|
176
|
+
$stderr.puts "I got: "+ rhs.to_s
|
177
|
+
exit 1
|
178
|
+
end
|
179
|
+
@features[feature_name] = (rhs == "true")
|
180
|
+
|
181
|
+
when "float"
|
182
|
+
# float value
|
183
|
+
@features[feature_name] = rhs.to_f
|
184
|
+
|
185
|
+
when "integer"
|
186
|
+
# integer value
|
187
|
+
@features[feature_name] = rhs.to_i
|
188
|
+
|
189
|
+
when "string"
|
190
|
+
# string value
|
191
|
+
@features[feature_name] = rhs
|
192
|
+
|
193
|
+
else
|
194
|
+
raise "Unknown feature type for feature #{feature_name}: #{@feature_types[feature_name]}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
####
|
199
|
+
# remove list entry in this config data structure:
|
200
|
+
# the lhs argument is the list feature name
|
201
|
+
# the rhs argument can be a string or a regexp.
|
202
|
+
# - string: each entry exactly matching the string is removed
|
203
|
+
# - regexp: each entry matching the regexp is removed
|
204
|
+
def unset_list_entry(lhs, #string: feature name
|
205
|
+
rhs) # string/regexp: righthand side
|
206
|
+
unless @feature_types[lhs] == "list"
|
207
|
+
$stderr.puts "Error in experiment file: "
|
208
|
+
$stderr.puts "Feature #{lhs} unknown or not of type list."
|
209
|
+
exit 1
|
210
|
+
end
|
211
|
+
|
212
|
+
case rhs.class.to_s
|
213
|
+
when "String"
|
214
|
+
rhs_match = Regexp.new("^" + Regexp.escape(rhs) + "$")
|
215
|
+
when "Regexp"
|
216
|
+
rhs_match = rhs
|
217
|
+
else
|
218
|
+
raise "Shouldn't be here: " + rhs.class.to_s
|
219
|
+
end
|
220
|
+
|
221
|
+
to_delete = @features[lhs].select { |entry| entry.join(" ") =~ rhs_match }
|
222
|
+
to_delete.each { |entry| @features[lhs].delete(entry) }
|
223
|
+
end
|
224
|
+
|
225
|
+
|
226
|
+
#####
|
227
|
+
# adjoin
|
228
|
+
#
|
229
|
+
# adds the information from a second ConfigData object
|
230
|
+
# to this one.
|
231
|
+
# Disjointness of feature names is assumed.
|
232
|
+
def adjoin(config_obj) # ConfigData object
|
233
|
+
|
234
|
+
##
|
235
|
+
# sanity checks:
|
236
|
+
# the other object must be a ConfigData object
|
237
|
+
unless config_obj.kind_of? ConfigData
|
238
|
+
raise "I can only adjoin another ConfigData object"
|
239
|
+
end
|
240
|
+
|
241
|
+
# if feature name sets are not disjoint,
|
242
|
+
# ignore the feature names that I already have
|
243
|
+
other_features, other_feature_types, other_list_feature_access = config_obj.get_contents()
|
244
|
+
unless (@feature_types.keys & other_feature_types.keys).empty?
|
245
|
+
other_features = other_features.clone()
|
246
|
+
other_feature_types = other_feature_types.clone()
|
247
|
+
other_list_feature_access = other_list_feature_access.clone()
|
248
|
+
|
249
|
+
(@feature_types.keys() & other_feature_types.keys()).each { |overlap_feature|
|
250
|
+
other_features.delete(overlap_feature)
|
251
|
+
other_feature_types.delete(overlap_feature)
|
252
|
+
other_list_feature_access.delete(overlap_feature)
|
253
|
+
}
|
254
|
+
end
|
255
|
+
|
256
|
+
# now adjoin the contents of the other config objects to mine
|
257
|
+
@features.update(other_features)
|
258
|
+
@feature_types.update(other_feature_types)
|
259
|
+
@list_feature_access.update(other_list_feature_access)
|
260
|
+
end
|
261
|
+
|
262
|
+
#####
|
263
|
+
# get()
|
264
|
+
#
|
265
|
+
# returns the value of a given feature
|
266
|
+
# raises an error if no feature of this name
|
267
|
+
# has been declared to the new() method
|
268
|
+
#
|
269
|
+
# returns: a feature value. the type of the return value
|
270
|
+
# depends on the type of the feature.
|
271
|
+
# returns nil if the feature has not been set in the config file.
|
272
|
+
def get(name) # string: name of the feature to access
|
273
|
+
if @feature_types[name].nil?
|
274
|
+
raise "Unknown feature " + name
|
275
|
+
end
|
276
|
+
|
277
|
+
# may return nil if something has not been set
|
278
|
+
return @features[name]
|
279
|
+
end
|
280
|
+
|
281
|
+
####
|
282
|
+
# get_type
|
283
|
+
#
|
284
|
+
# returns the type of a given feature,
|
285
|
+
# or nil if it is undefined
|
286
|
+
def get_type(feature_name)
|
287
|
+
return @feature_types[feature_name]
|
288
|
+
end
|
289
|
+
|
290
|
+
#####
|
291
|
+
# is_defined
|
292
|
+
#
|
293
|
+
# returns: true if a feature by this name has been set in the config file,
|
294
|
+
# false else
|
295
|
+
def is_defined(feature) # string: name of the feature
|
296
|
+
if @features[feature]
|
297
|
+
return true
|
298
|
+
else
|
299
|
+
return false
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
#####
|
304
|
+
# instantiate
|
305
|
+
#
|
306
|
+
# given a pattern type feature, and a hash
|
307
|
+
# mapping all variables occurring in the pattern to
|
308
|
+
# values, instantiate the pattern
|
309
|
+
#
|
310
|
+
# returns: string, the pattern with all variables
|
311
|
+
# instantiated with their values
|
312
|
+
def instantiate(key, # string: feature name
|
313
|
+
var_hash={}) # hash: variable name(string) => value(string)
|
314
|
+
|
315
|
+
unless @feature_types[key] == "pattern"
|
316
|
+
raise "Nothing known about pattern " + key
|
317
|
+
end
|
318
|
+
unless @features[key]
|
319
|
+
raise "Please define pattern in configuration file: " + key
|
320
|
+
end
|
321
|
+
|
322
|
+
# piece together the file name
|
323
|
+
# expand in case it is a filename/directory
|
324
|
+
return @features[key].instantiate(var_hash)
|
325
|
+
end
|
326
|
+
|
327
|
+
#####
|
328
|
+
# get_filename:
|
329
|
+
#
|
330
|
+
# synonym for instantiate()
|
331
|
+
def get_filename(key, var_hash={})
|
332
|
+
return instantiate(key, var_hash)
|
333
|
+
end
|
334
|
+
|
335
|
+
#####
|
336
|
+
# set_test_print
|
337
|
+
#
|
338
|
+
# set test output to on (true) or off (false)
|
339
|
+
def set_test_print(tf) # boolean
|
340
|
+
unless [true, false].include? tf
|
341
|
+
raise "Shouldn't be here"
|
342
|
+
end
|
343
|
+
@test_print = tf
|
344
|
+
end
|
345
|
+
|
346
|
+
|
347
|
+
#####
|
348
|
+
# get_all_filenames
|
349
|
+
#
|
350
|
+
# given a directory, a pattern type feature,
|
351
|
+
# and a hash mapping some of the pattern's variables
|
352
|
+
# to values, return all filenames in the given directory
|
353
|
+
# that match the partially instantiated pattern
|
354
|
+
#
|
355
|
+
# returns: an array of pairs [filename(string), matches(hash)]
|
356
|
+
# where the matches hash maps all variables of the pattern to
|
357
|
+
# their values as instantiated in the given filename
|
358
|
+
# The filename doesn't include the directory.
|
359
|
+
def get_all_filenames(dir, #string: directory name
|
360
|
+
key, # string: name of pattern type feature
|
361
|
+
var_hash={}) # hash: variable name(string) => value(string)
|
362
|
+
|
363
|
+
unless @feature_types[key] == "pattern"
|
364
|
+
raise "Nothing known about file format " + key
|
365
|
+
end
|
366
|
+
|
367
|
+
# array of pairs [filename(string), matches(hash)]
|
368
|
+
filenames = Array.new
|
369
|
+
|
370
|
+
# iterate through all files of this directory
|
371
|
+
Dir.foreach(dir) { |filename|
|
372
|
+
# does the filename match the pattern of the feature "key"?
|
373
|
+
if (matches = @features[key].match(filename, var_hash))
|
374
|
+
# do the variable values for this filename conform
|
375
|
+
# to the variable values given in var_hash?
|
376
|
+
if @test_print
|
377
|
+
$stderr.puts "got " + filename
|
378
|
+
end
|
379
|
+
if var_hash.keys.select { |var|
|
380
|
+
matches[var] != var_hash[var]
|
381
|
+
}.empty?
|
382
|
+
filenames << [filename, matches]
|
383
|
+
else
|
384
|
+
# mismatch for given variables
|
385
|
+
if @test_print
|
386
|
+
var_hash.keys.each { |var|
|
387
|
+
if matches[var] != var_hash[var]
|
388
|
+
$stderr.puts "Mismatch for " + var + ": " +
|
389
|
+
matches[var].to_s + " vs. " + var_hash[var]
|
390
|
+
end
|
391
|
+
}
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end
|
395
|
+
}
|
396
|
+
|
397
|
+
return filenames
|
398
|
+
end
|
399
|
+
|
400
|
+
#####
|
401
|
+
# set list feature access:
|
402
|
+
#
|
403
|
+
# for a given list type feature, set a method that should
|
404
|
+
# be used for accessing the feature.
|
405
|
+
#
|
406
|
+
# method signature: first parameter is an array of tuples of strings.
|
407
|
+
# for each experiment file entry
|
408
|
+
# feature = rhs
|
409
|
+
# there will be a tuple rhs.split() in the list.
|
410
|
+
#
|
411
|
+
# The other parameters are not checked by ConfigData, there
|
412
|
+
# may be arbitrarily many
|
413
|
+
def set_list_feature_access(feature_name, # string: name of the feature
|
414
|
+
proc) # proc: access method for list feature
|
415
|
+
unless @feature_types[feature_name] == 'list'
|
416
|
+
raise "Cannot set list feature access to non-list feature #{feature_name}"
|
417
|
+
end
|
418
|
+
|
419
|
+
@list_feature_access[feature_name] = proc
|
420
|
+
end
|
421
|
+
|
422
|
+
#####
|
423
|
+
# get_lf
|
424
|
+
#
|
425
|
+
# access a list type feature for which an access function
|
426
|
+
# has been set using set_list_feature_access
|
427
|
+
#
|
428
|
+
# returns: whatever the access function returns
|
429
|
+
def get_lf(feature_name, # string: name of list feature
|
430
|
+
*parameters) # parameters for access function, collapsed into an array here
|
431
|
+
|
432
|
+
unless @list_feature_access[feature_name]
|
433
|
+
raise "I have no list feature access method for #{feature_name}."
|
434
|
+
end
|
435
|
+
|
436
|
+
# call access function, re-exploding the collapsed parameters and
|
437
|
+
# adding the list of values for the list feature as first parameter
|
438
|
+
return @list_feature_access[feature_name].call(@features[feature_name], *parameters)
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
protected
|
443
|
+
|
444
|
+
#####
|
445
|
+
# extract_def
|
446
|
+
#
|
447
|
+
# given a line of the config file,
|
448
|
+
# it is assumed that it has the structure
|
449
|
+
# [white space] string [white space] = [white space] stuff
|
450
|
+
# 'stuff' may include further white space, 'string' may not.
|
451
|
+
#
|
452
|
+
# returns: a pair of strings, the left-hand side and the right-hand side
|
453
|
+
# of the =, minus the [white space] in the places shown above
|
454
|
+
|
455
|
+
def extract_def(line) # string: line from config file
|
456
|
+
unless line =~ /^\s*(\w+)\s*=\s*([^\s].*)$/
|
457
|
+
$stderr.puts "Error in experiment file: "
|
458
|
+
$stderr.puts "I couldn't analyze the following line: "
|
459
|
+
$stderr.puts line
|
460
|
+
exit 1
|
461
|
+
end
|
462
|
+
return [$1, $2]
|
463
|
+
end
|
464
|
+
|
465
|
+
####
|
466
|
+
# access to the object variables
|
467
|
+
def get_contents()
|
468
|
+
return [@features, @feature_types, @list_feature_access]
|
469
|
+
end
|
470
|
+
|
471
|
+
end
|
472
|
+
|
473
|
+
|
474
|
+
##############################
|
475
|
+
# ConfigFormatelement is an auxiliary class
|
476
|
+
# of ConfigData.
|
477
|
+
# It keeps track of feature patterns with variables in them
|
478
|
+
# that can be instantiated.
|
479
|
+
|
480
|
+
class ConfigFormatElement
|
481
|
+
|
482
|
+
# new()
|
483
|
+
#
|
484
|
+
# given a pattern and a list of variable names,
|
485
|
+
# analyze the pattern and remember the variable names
|
486
|
+
#
|
487
|
+
def initialize(string, # string: feature name, may include names of variables.
|
488
|
+
# they are included in <>
|
489
|
+
variables) # list of variable names that can occur
|
490
|
+
|
491
|
+
@variables = variables
|
492
|
+
|
493
|
+
# pattern: this is what the 'string' is split into,
|
494
|
+
# an array of elements that are either fixed parts or variables.
|
495
|
+
# fixed part: pair [item:string, "string"]
|
496
|
+
# variable: pair [variable_name:string, "variable"]
|
497
|
+
@pattern = Array.new
|
498
|
+
state = "out"
|
499
|
+
item = ""
|
500
|
+
|
501
|
+
# analyze string,
|
502
|
+
# split into variables and fixed parts
|
503
|
+
string.split(//).each { |char|
|
504
|
+
|
505
|
+
case state
|
506
|
+
when "in"
|
507
|
+
case char
|
508
|
+
when "<"
|
509
|
+
raise "Duplicate < in " + string
|
510
|
+
when ">"
|
511
|
+
unless @variables.include? item
|
512
|
+
raise "Unknown variable " + item
|
513
|
+
end
|
514
|
+
@pattern << [item, "variable"]
|
515
|
+
item = ""
|
516
|
+
state = "out"
|
517
|
+
else
|
518
|
+
item << char
|
519
|
+
state = "in"
|
520
|
+
end
|
521
|
+
|
522
|
+
when "out"
|
523
|
+
case char
|
524
|
+
when "<"
|
525
|
+
unless item.empty?
|
526
|
+
@pattern << [item, "string"]
|
527
|
+
item = ""
|
528
|
+
end
|
529
|
+
state = "in"
|
530
|
+
when ">"
|
531
|
+
raise "Unexpected > in " + string
|
532
|
+
else
|
533
|
+
item << char
|
534
|
+
state = "out"
|
535
|
+
end
|
536
|
+
|
537
|
+
else
|
538
|
+
raise "Shouldn't be here"
|
539
|
+
end
|
540
|
+
}
|
541
|
+
|
542
|
+
# read through the whole of "string"
|
543
|
+
# end state has to be "out"
|
544
|
+
unless state == "out"
|
545
|
+
raise "Unclosed < in " + string
|
546
|
+
end
|
547
|
+
|
548
|
+
# last bit still to be recorded?
|
549
|
+
unless item.empty?
|
550
|
+
@pattern << [item, "string"]
|
551
|
+
end
|
552
|
+
|
553
|
+
# make regexp for matching this pattern
|
554
|
+
@regexp = make_regexp(@pattern)
|
555
|
+
end
|
556
|
+
|
557
|
+
# instantiate: given pairs of variable names and variable values,
|
558
|
+
# instantiate @pattern to a string in which var names are replaced
|
559
|
+
# by their values
|
560
|
+
#
|
561
|
+
# returns: string
|
562
|
+
def instantiate(var_hash) # hash variable name(string) => variable value(string)
|
563
|
+
|
564
|
+
# instantiate the pattern
|
565
|
+
return @pattern.map { |item, string_or_var|
|
566
|
+
|
567
|
+
case string_or_var
|
568
|
+
when "string"
|
569
|
+
item
|
570
|
+
|
571
|
+
when "variable"
|
572
|
+
|
573
|
+
if var_hash[item].nil?
|
574
|
+
raise "Missing variable instantiation: " + item
|
575
|
+
end
|
576
|
+
var_hash[item]
|
577
|
+
|
578
|
+
else
|
579
|
+
raise "Shouldn't be here"
|
580
|
+
end
|
581
|
+
}.join
|
582
|
+
end
|
583
|
+
|
584
|
+
# match()
|
585
|
+
#
|
586
|
+
# given a string, try to match it against the @pattern
|
587
|
+
# while setting the variables given in 'fillers' to
|
588
|
+
# the values given in that hash.
|
589
|
+
#
|
590
|
+
# returns: if the string matches, a hash variable name => value
|
591
|
+
# that includes the fillers given as a parameter as well as
|
592
|
+
# values for all other variables mentioned in @pattern,
|
593
|
+
# or false if no match.
|
594
|
+
def match(string, # a string
|
595
|
+
fillers = nil) # hash variable name(string) => value(string)
|
596
|
+
|
597
|
+
# have we been given partial info about variables?
|
598
|
+
if fillers
|
599
|
+
match = make_regexp(@pattern, fillers).match(string)
|
600
|
+
# $stderr.print "matching " + make_regexp(@pattern, fillers).source +
|
601
|
+
# " against " + string + " "
|
602
|
+
# if match.nil?
|
603
|
+
# $stderr.puts "no"
|
604
|
+
# else
|
605
|
+
# $stderr.puts "yes"
|
606
|
+
# end
|
607
|
+
else
|
608
|
+
match = @regexp.match(string)
|
609
|
+
end
|
610
|
+
|
611
|
+
if match.nil?
|
612
|
+
# no match via the regular expression
|
613
|
+
return false
|
614
|
+
end
|
615
|
+
|
616
|
+
# regular expression matched.
|
617
|
+
# construct return value in hash
|
618
|
+
# retv: variable name(string) => value(string)
|
619
|
+
retv = Hash.new()
|
620
|
+
if fillers
|
621
|
+
# include given fillers in retv hash
|
622
|
+
fillers.each_pair { |name, val| retv[name] = val }
|
623
|
+
end
|
624
|
+
|
625
|
+
# now put values for other variables in @pattern into retv
|
626
|
+
index = 1
|
627
|
+
@pattern.to_a.select { |item, string_or_var|
|
628
|
+
string_or_var == "variable"
|
629
|
+
}.select { |item, string_or_var|
|
630
|
+
fillers.nil? or
|
631
|
+
fillers[item].nil?
|
632
|
+
}.each { |item, string_or_var|
|
633
|
+
# for all items on the pattern list
|
634
|
+
# that are variables and
|
635
|
+
# haven't been filled by the "fillers" list already:
|
636
|
+
# fill from matches
|
637
|
+
|
638
|
+
if match[index].nil?
|
639
|
+
raise "Match, but not enough matched elements? Strange."
|
640
|
+
end
|
641
|
+
|
642
|
+
if retv[item].nil?
|
643
|
+
retv[item] = match[index]
|
644
|
+
else
|
645
|
+
unless retv[item] == match[index]
|
646
|
+
return false
|
647
|
+
end
|
648
|
+
end
|
649
|
+
|
650
|
+
index += 1
|
651
|
+
}
|
652
|
+
|
653
|
+
return retv
|
654
|
+
end
|
655
|
+
|
656
|
+
# used_variables
|
657
|
+
#
|
658
|
+
# returns: an array of variable names used in @pattern
|
659
|
+
def used_variables()
|
660
|
+
return @pattern.select { |item, string_or_var|
|
661
|
+
string_or_var == "variable"
|
662
|
+
}.map { |item, string_or_var| item}
|
663
|
+
end
|
664
|
+
|
665
|
+
####################
|
666
|
+
private
|
667
|
+
|
668
|
+
# make_regexp:
|
669
|
+
# make regular expression from a pattern
|
670
|
+
# together with some variable fillers
|
671
|
+
#
|
672
|
+
# returns: Regexp object
|
673
|
+
def make_regexp(pattern, # array of pairs [string, "string"] or [string, "variable"]
|
674
|
+
fillers = nil) # hash variable name(string) => value(string)
|
675
|
+
return (Regexp.new "^" +
|
676
|
+
pattern.map { |item, string_or_var|
|
677
|
+
case string_or_var
|
678
|
+
when "variable"
|
679
|
+
if fillers and
|
680
|
+
fillers[item]
|
681
|
+
Regexp.escape(fillers[item])
|
682
|
+
else
|
683
|
+
"(.+)"
|
684
|
+
end
|
685
|
+
when "string"
|
686
|
+
Regexp.escape(item)
|
687
|
+
else
|
688
|
+
raise "Shouldn't be here"
|
689
|
+
end
|
690
|
+
}.join + "$")
|
691
|
+
end
|
692
|
+
|
693
|
+
end
|
694
|
+
|