frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,694 @@
1
+ # class ConfigData:
2
+ #
3
+ # reads config data file,
4
+ # matches it against feature declarations given in its new() method,
5
+ # offers access methods for different kinds of features
6
+ #
7
+ # In the config file, all feature specifications have the form
8
+ #
9
+ # feature_name = feature_value
10
+ #
11
+ # where feature_name is a string without spaces. feature_value
12
+ # may include spaces, depending on the feature type (see below).
13
+ #
14
+ # To include a comment in a config file, start the comment line with
15
+ # '#'.
16
+ #
17
+ # Features are typed. The following types are supported:
18
+ #
19
+ # - normal types:
20
+ # "bool", "float", "integer", "string"
21
+ # For the get() function with which features in the ConfigData object
22
+ # are accessed, the values are transformed from the strings in the
23
+ # config file to the appropriate class: Boolean, Float, Integer, String
24
+ #
25
+ # - other types:
26
+ # pattern: This is a feature that may include variables in
27
+ # <> brackets. When this feature is accesssed,
28
+ # values for these variables are given, i.e. this
29
+ # pattern has to be instantiated.
30
+ # For example, given a feature
31
+ #
32
+ # fileformat = features.<type>.train
33
+ #
34
+ # and method call
35
+ # instantiate("fileformat", "type" => "path")
36
+ #
37
+ # what is returned is a string "features.path.train"
38
+ #
39
+ # Variables used in a pattern have to be declared to
40
+ # the new() method.
41
+ #
42
+ # list: This is the only feature type where more than one
43
+ # feature specification with the same feature_name is allowed.
44
+ # The right-hand sides of a list feature are stored in an array.
45
+ #
46
+ # Given a 'list' feature 'bla', if the config file contains
47
+ #
48
+ # bla = blupp 1 2
49
+ # bla = la di da
50
+ #
51
+ # the list feature 'bla' is represented as follows:
52
+ # @features['bla'] = [['blupp', 1,2], ['la', 'di', 'da']]
53
+ #
54
+ # For comfortable access to a list feature, arbitrary
55
+ # access functions for list features can be defined.
56
+ #
57
+ #
58
+
59
+ require 'frprep/ruby_class_extensions'
60
+
61
+
62
+ #####################################################
63
+ ####################################################
64
+ # ConfigData is the main class in this package.
65
+ # It manages config files.
66
+ #
67
+ # To use it, inherit from it and just make a new new() method
68
+ # that only takes as input the name of the config file
69
+ # and that declares all the feature types and variable names
70
+ # needed for the given application.
71
+
72
+ class ConfigData
73
+
74
+ ###########
75
+ # new()
76
+ #
77
+ # reads the config file
78
+ #
79
+ # Input parameters: the name of the config file, a hash declaring all
80
+ # features by mapping feature names to their types,
81
+ # and an array of all variables that may occur in pattern type features
82
+ #
83
+ def initialize(filename, # string: name of config file
84
+ feature_types, # hash: feature_name => feature_type
85
+ variables) # array of strings: list of variables used in pattern features
86
+
87
+ @test_print = false
88
+ @variables = variables
89
+ @original_filename = filename
90
+
91
+ ##
92
+ # open config file
93
+ begin
94
+ file = File.new(filename)
95
+ rescue
96
+ $stderr.puts "Error: I could not open the experiment file " + filename
97
+ exit 1
98
+ end
99
+
100
+ # feature_types: hash: feature_name => feature_type
101
+ # features: hash: feature_name => value
102
+ @feature_types = feature_types
103
+ @features = Hash.new
104
+
105
+ # @list_feature_access: hash feature_name => Proc
106
+ # access method for list features
107
+ @list_feature_access = Hash.new
108
+
109
+ # pre-initialize list features to an empty array
110
+ @feature_types.each_pair { |feature_name, feature_type|
111
+ if feature_type == "list"
112
+ @features[feature_name] = Array.new
113
+ end
114
+ }
115
+
116
+ ##
117
+ # examine the config file contents
118
+
119
+ while (line = file.gets())
120
+ line = line.chomp().strip()
121
+ if line =~ /^#/ # comment
122
+ next
123
+ end
124
+
125
+ if line.empty? # nothing to be seen here
126
+ next
127
+ end
128
+
129
+ feature_name, rhs = extract_def(line)
130
+ set_entry(feature_name, rhs)
131
+ end
132
+ end
133
+
134
+ #####
135
+ # set_entry
136
+ #
137
+ # set an entry in the experiment file, either an existing or a new one
138
+ # but it must conform to the feature types declared in the new() method
139
+ def set_entry(feature_name, rhs)
140
+
141
+ unless @feature_types[feature_name]
142
+ $stderr.puts "Error in experiment file:"
143
+ $stderr.puts "Unknown parameter #{feature_name} in #{@original_filename}."
144
+ $stderr.puts "Expected features for this type of experiment file:"
145
+ $stderr.puts @feature_types.keys().join(", ")
146
+ exit 1
147
+ end
148
+
149
+ case @feature_types[feature_name]
150
+ when "pattern"
151
+ # file format specification
152
+
153
+ @features[feature_name] = ConfigFormatElement.new(rhs, @variables)
154
+
155
+ when "list"
156
+
157
+ # rhs is a string of space-separated words
158
+ # the first of them is the key, the rest is the value, to be
159
+ # stored as an array of words
160
+
161
+ # split rhs into words
162
+ if rhs.empty?
163
+ $stderr.puts "WARNING: I got an empty value for list feature #{feature_name}."
164
+ $stderr.puts "I'll ignore it."
165
+ else
166
+ unless @features[feature_name].include? rhs.split()
167
+ @features[feature_name] << rhs.split()
168
+ end
169
+ end
170
+
171
+ when "bool"
172
+ # boolean value
173
+ unless ["true", "false"].include? rhs
174
+ $stderr.puts "Error in experiment file:"
175
+ $stderr.puts "Value for #{feature_name} must be either 'true' or 'false'."
176
+ $stderr.puts "I got: "+ rhs.to_s
177
+ exit 1
178
+ end
179
+ @features[feature_name] = (rhs == "true")
180
+
181
+ when "float"
182
+ # float value
183
+ @features[feature_name] = rhs.to_f
184
+
185
+ when "integer"
186
+ # integer value
187
+ @features[feature_name] = rhs.to_i
188
+
189
+ when "string"
190
+ # string value
191
+ @features[feature_name] = rhs
192
+
193
+ else
194
+ raise "Unknown feature type for feature #{feature_name}: #{@feature_types[feature_name]}"
195
+ end
196
+ end
197
+
198
+ ####
199
+ # remove list entry in this config data structure:
200
+ # the lhs argument is the list feature name
201
+ # the rhs argument can be a string or a regexp.
202
+ # - string: each entry exactly matching the string is removed
203
+ # - regexp: each entry matching the regexp is removed
204
+ def unset_list_entry(lhs, #string: feature name
205
+ rhs) # string/regexp: righthand side
206
+ unless @feature_types[lhs] == "list"
207
+ $stderr.puts "Error in experiment file: "
208
+ $stderr.puts "Feature #{lhs} unknown or not of type list."
209
+ exit 1
210
+ end
211
+
212
+ case rhs.class.to_s
213
+ when "String"
214
+ rhs_match = Regexp.new("^" + Regexp.escape(rhs) + "$")
215
+ when "Regexp"
216
+ rhs_match = rhs
217
+ else
218
+ raise "Shouldn't be here: " + rhs.class.to_s
219
+ end
220
+
221
+ to_delete = @features[lhs].select { |entry| entry.join(" ") =~ rhs_match }
222
+ to_delete.each { |entry| @features[lhs].delete(entry) }
223
+ end
224
+
225
+
226
+ #####
227
+ # adjoin
228
+ #
229
+ # adds the information from a second ConfigData object
230
+ # to this one.
231
+ # Disjointness of feature names is assumed.
232
+ def adjoin(config_obj) # ConfigData object
233
+
234
+ ##
235
+ # sanity checks:
236
+ # the other object must be a ConfigData object
237
+ unless config_obj.kind_of? ConfigData
238
+ raise "I can only adjoin another ConfigData object"
239
+ end
240
+
241
+ # if feature name sets are not disjoint,
242
+ # ignore the feature names that I already have
243
+ other_features, other_feature_types, other_list_feature_access = config_obj.get_contents()
244
+ unless (@feature_types.keys & other_feature_types.keys).empty?
245
+ other_features = other_features.clone()
246
+ other_feature_types = other_feature_types.clone()
247
+ other_list_feature_access = other_list_feature_access.clone()
248
+
249
+ (@feature_types.keys() & other_feature_types.keys()).each { |overlap_feature|
250
+ other_features.delete(overlap_feature)
251
+ other_feature_types.delete(overlap_feature)
252
+ other_list_feature_access.delete(overlap_feature)
253
+ }
254
+ end
255
+
256
+ # now adjoin the contents of the other config objects to mine
257
+ @features.update(other_features)
258
+ @feature_types.update(other_feature_types)
259
+ @list_feature_access.update(other_list_feature_access)
260
+ end
261
+
262
+ #####
263
+ # get()
264
+ #
265
+ # returns the value of a given feature
266
+ # raises an error if no feature of this name
267
+ # has been declared to the new() method
268
+ #
269
+ # returns: a feature value. the type of the return value
270
+ # depends on the type of the feature.
271
+ # returns nil if the feature has not been set in the config file.
272
+ def get(name) # string: name of the feature to access
273
+ if @feature_types[name].nil?
274
+ raise "Unknown feature " + name
275
+ end
276
+
277
+ # may return nil if something has not been set
278
+ return @features[name]
279
+ end
280
+
281
+ ####
282
+ # get_type
283
+ #
284
+ # returns the type of a given feature,
285
+ # or nil if it is undefined
286
+ def get_type(feature_name)
287
+ return @feature_types[feature_name]
288
+ end
289
+
290
+ #####
291
+ # is_defined
292
+ #
293
+ # returns: true if a feature by this name has been set in the config file,
294
+ # false else
295
+ def is_defined(feature) # string: name of the feature
296
+ if @features[feature]
297
+ return true
298
+ else
299
+ return false
300
+ end
301
+ end
302
+
303
+ #####
304
+ # instantiate
305
+ #
306
+ # given a pattern type feature, and a hash
307
+ # mapping all variables occurring in the pattern to
308
+ # values, instantiate the pattern
309
+ #
310
+ # returns: string, the pattern with all variables
311
+ # instantiated with their values
312
+ def instantiate(key, # string: feature name
313
+ var_hash={}) # hash: variable name(string) => value(string)
314
+
315
+ unless @feature_types[key] == "pattern"
316
+ raise "Nothing known about pattern " + key
317
+ end
318
+ unless @features[key]
319
+ raise "Please define pattern in configuration file: " + key
320
+ end
321
+
322
+ # piece together the file name
323
+ # expand in case it is a filename/directory
324
+ return @features[key].instantiate(var_hash)
325
+ end
326
+
327
+ #####
328
+ # get_filename:
329
+ #
330
+ # synonym for instantiate()
331
+ def get_filename(key, var_hash={})
332
+ return instantiate(key, var_hash)
333
+ end
334
+
335
+ #####
336
+ # set_test_print
337
+ #
338
+ # set test output to on (true) or off (false)
339
+ def set_test_print(tf) # boolean
340
+ unless [true, false].include? tf
341
+ raise "Shouldn't be here"
342
+ end
343
+ @test_print = tf
344
+ end
345
+
346
+
347
+ #####
348
+ # get_all_filenames
349
+ #
350
+ # given a directory, a pattern type feature,
351
+ # and a hash mapping some of the pattern's variables
352
+ # to values, return all filenames in the given directory
353
+ # that match the partially instantiated pattern
354
+ #
355
+ # returns: an array of pairs [filename(string), matches(hash)]
356
+ # where the matches hash maps all variables of the pattern to
357
+ # their values as instantiated in the given filename
358
+ # The filename doesn't include the directory.
359
+ def get_all_filenames(dir, #string: directory name
360
+ key, # string: name of pattern type feature
361
+ var_hash={}) # hash: variable name(string) => value(string)
362
+
363
+ unless @feature_types[key] == "pattern"
364
+ raise "Nothing known about file format " + key
365
+ end
366
+
367
+ # array of pairs [filename(string), matches(hash)]
368
+ filenames = Array.new
369
+
370
+ # iterate through all files of this directory
371
+ Dir.foreach(dir) { |filename|
372
+ # does the filename match the pattern of the feature "key"?
373
+ if (matches = @features[key].match(filename, var_hash))
374
+ # do the variable values for this filename conform
375
+ # to the variable values given in var_hash?
376
+ if @test_print
377
+ $stderr.puts "got " + filename
378
+ end
379
+ if var_hash.keys.select { |var|
380
+ matches[var] != var_hash[var]
381
+ }.empty?
382
+ filenames << [filename, matches]
383
+ else
384
+ # mismatch for given variables
385
+ if @test_print
386
+ var_hash.keys.each { |var|
387
+ if matches[var] != var_hash[var]
388
+ $stderr.puts "Mismatch for " + var + ": " +
389
+ matches[var].to_s + " vs. " + var_hash[var]
390
+ end
391
+ }
392
+ end
393
+ end
394
+ end
395
+ }
396
+
397
+ return filenames
398
+ end
399
+
400
+ #####
401
+ # set list feature access:
402
+ #
403
+ # for a given list type feature, set a method that should
404
+ # be used for accessing the feature.
405
+ #
406
+ # method signature: first parameter is an array of tuples of strings.
407
+ # for each experiment file entry
408
+ # feature = rhs
409
+ # there will be a tuple rhs.split() in the list.
410
+ #
411
+ # The other parameters are not checked by ConfigData, there
412
+ # may be arbitrarily many
413
+ def set_list_feature_access(feature_name, # string: name of the feature
414
+ proc) # proc: access method for list feature
415
+ unless @feature_types[feature_name] == 'list'
416
+ raise "Cannot set list feature access to non-list feature #{feature_name}"
417
+ end
418
+
419
+ @list_feature_access[feature_name] = proc
420
+ end
421
+
422
+ #####
423
+ # get_lf
424
+ #
425
+ # access a list type feature for which an access function
426
+ # has been set using set_list_feature_access
427
+ #
428
+ # returns: whatever the access function returns
429
+ def get_lf(feature_name, # string: name of list feature
430
+ *parameters) # parameters for access function, collapsed into an array here
431
+
432
+ unless @list_feature_access[feature_name]
433
+ raise "I have no list feature access method for #{feature_name}."
434
+ end
435
+
436
+ # call access function, re-exploding the collapsed parameters and
437
+ # adding the list of values for the list feature as first parameter
438
+ return @list_feature_access[feature_name].call(@features[feature_name], *parameters)
439
+ end
440
+
441
+
442
+ protected
443
+
444
+ #####
445
+ # extract_def
446
+ #
447
+ # given a line of the config file,
448
+ # it is assumed that it has the structure
449
+ # [white space] string [white space] = [white space] stuff
450
+ # 'stuff' may include further white space, 'string' may not.
451
+ #
452
+ # returns: a pair of strings, the left-hand side and the right-hand side
453
+ # of the =, minus the [white space] in the places shown above
454
+
455
+ def extract_def(line) # string: line from config file
456
+ unless line =~ /^\s*(\w+)\s*=\s*([^\s].*)$/
457
+ $stderr.puts "Error in experiment file: "
458
+ $stderr.puts "I couldn't analyze the following line: "
459
+ $stderr.puts line
460
+ exit 1
461
+ end
462
+ return [$1, $2]
463
+ end
464
+
465
+ ####
466
+ # access to the object variables
467
+ def get_contents()
468
+ return [@features, @feature_types, @list_feature_access]
469
+ end
470
+
471
+ end
472
+
473
+
474
+ ##############################
475
+ # ConfigFormatelement is an auxiliary class
476
+ # of ConfigData.
477
+ # It keeps track of feature patterns with variables in them
478
+ # that can be instantiated.
479
+
480
+ class ConfigFormatElement
481
+
482
+ # new()
483
+ #
484
+ # given a pattern and a list of variable names,
485
+ # analyze the pattern and remember the variable names
486
+ #
487
+ def initialize(string, # string: feature name, may include names of variables.
488
+ # they are included in <>
489
+ variables) # list of variable names that can occur
490
+
491
+ @variables = variables
492
+
493
+ # pattern: this is what the 'string' is split into,
494
+ # an array of elements that are either fixed parts or variables.
495
+ # fixed part: pair [item:string, "string"]
496
+ # variable: pair [variable_name:string, "variable"]
497
+ @pattern = Array.new
498
+ state = "out"
499
+ item = ""
500
+
501
+ # analyze string,
502
+ # split into variables and fixed parts
503
+ string.split(//).each { |char|
504
+
505
+ case state
506
+ when "in"
507
+ case char
508
+ when "<"
509
+ raise "Duplicate < in " + string
510
+ when ">"
511
+ unless @variables.include? item
512
+ raise "Unknown variable " + item
513
+ end
514
+ @pattern << [item, "variable"]
515
+ item = ""
516
+ state = "out"
517
+ else
518
+ item << char
519
+ state = "in"
520
+ end
521
+
522
+ when "out"
523
+ case char
524
+ when "<"
525
+ unless item.empty?
526
+ @pattern << [item, "string"]
527
+ item = ""
528
+ end
529
+ state = "in"
530
+ when ">"
531
+ raise "Unexpected > in " + string
532
+ else
533
+ item << char
534
+ state = "out"
535
+ end
536
+
537
+ else
538
+ raise "Shouldn't be here"
539
+ end
540
+ }
541
+
542
+ # read through the whole of "string"
543
+ # end state has to be "out"
544
+ unless state == "out"
545
+ raise "Unclosed < in " + string
546
+ end
547
+
548
+ # last bit still to be recorded?
549
+ unless item.empty?
550
+ @pattern << [item, "string"]
551
+ end
552
+
553
+ # make regexp for matching this pattern
554
+ @regexp = make_regexp(@pattern)
555
+ end
556
+
557
+ # instantiate: given pairs of variable names and variable values,
558
+ # instantiate @pattern to a string in which var names are replaced
559
+ # by their values
560
+ #
561
+ # returns: string
562
+ def instantiate(var_hash) # hash variable name(string) => variable value(string)
563
+
564
+ # instantiate the pattern
565
+ return @pattern.map { |item, string_or_var|
566
+
567
+ case string_or_var
568
+ when "string"
569
+ item
570
+
571
+ when "variable"
572
+
573
+ if var_hash[item].nil?
574
+ raise "Missing variable instantiation: " + item
575
+ end
576
+ var_hash[item]
577
+
578
+ else
579
+ raise "Shouldn't be here"
580
+ end
581
+ }.join
582
+ end
583
+
584
+ # match()
585
+ #
586
+ # given a string, try to match it against the @pattern
587
+ # while setting the variables given in 'fillers' to
588
+ # the values given in that hash.
589
+ #
590
+ # returns: if the string matches, a hash variable name => value
591
+ # that includes the fillers given as a parameter as well as
592
+ # values for all other variables mentioned in @pattern,
593
+ # or false if no match.
594
+ def match(string, # a string
595
+ fillers = nil) # hash variable name(string) => value(string)
596
+
597
+ # have we been given partial info about variables?
598
+ if fillers
599
+ match = make_regexp(@pattern, fillers).match(string)
600
+ # $stderr.print "matching " + make_regexp(@pattern, fillers).source +
601
+ # " against " + string + " "
602
+ # if match.nil?
603
+ # $stderr.puts "no"
604
+ # else
605
+ # $stderr.puts "yes"
606
+ # end
607
+ else
608
+ match = @regexp.match(string)
609
+ end
610
+
611
+ if match.nil?
612
+ # no match via the regular expression
613
+ return false
614
+ end
615
+
616
+ # regular expression matched.
617
+ # construct return value in hash
618
+ # retv: variable name(string) => value(string)
619
+ retv = Hash.new()
620
+ if fillers
621
+ # include given fillers in retv hash
622
+ fillers.each_pair { |name, val| retv[name] = val }
623
+ end
624
+
625
+ # now put values for other variables in @pattern into retv
626
+ index = 1
627
+ @pattern.to_a.select { |item, string_or_var|
628
+ string_or_var == "variable"
629
+ }.select { |item, string_or_var|
630
+ fillers.nil? or
631
+ fillers[item].nil?
632
+ }.each { |item, string_or_var|
633
+ # for all items on the pattern list
634
+ # that are variables and
635
+ # haven't been filled by the "fillers" list already:
636
+ # fill from matches
637
+
638
+ if match[index].nil?
639
+ raise "Match, but not enough matched elements? Strange."
640
+ end
641
+
642
+ if retv[item].nil?
643
+ retv[item] = match[index]
644
+ else
645
+ unless retv[item] == match[index]
646
+ return false
647
+ end
648
+ end
649
+
650
+ index += 1
651
+ }
652
+
653
+ return retv
654
+ end
655
+
656
+ # used_variables
657
+ #
658
+ # returns: an array of variable names used in @pattern
659
+ def used_variables()
660
+ return @pattern.select { |item, string_or_var|
661
+ string_or_var == "variable"
662
+ }.map { |item, string_or_var| item}
663
+ end
664
+
665
+ ####################
666
+ private
667
+
668
+ # make_regexp:
669
+ # make regular expression from a pattern
670
+ # together with some variable fillers
671
+ #
672
+ # returns: Regexp object
673
+ def make_regexp(pattern, # array of pairs [string, "string"] or [string, "variable"]
674
+ fillers = nil) # hash variable name(string) => value(string)
675
+ return (Regexp.new "^" +
676
+ pattern.map { |item, string_or_var|
677
+ case string_or_var
678
+ when "variable"
679
+ if fillers and
680
+ fillers[item]
681
+ Regexp.escape(fillers[item])
682
+ else
683
+ "(.+)"
684
+ end
685
+ when "string"
686
+ Regexp.escape(item)
687
+ else
688
+ raise "Shouldn't be here"
689
+ end
690
+ }.join + "$")
691
+ end
692
+
693
+ end
694
+