frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,694 @@
1
+ # class ConfigData:
2
+ #
3
+ # reads config data file,
4
+ # matches it against feature declarations given in its new() method,
5
+ # offers access methods for different kinds of features
6
+ #
7
+ # In the config file, all feature specifications have the form
8
+ #
9
+ # feature_name = feature_value
10
+ #
11
+ # where feature_name is a string without spaces. feature_value
12
+ # may include spaces, depending on the feature type (see below).
13
+ #
14
+ # To include a comment in a config file, start the comment line with
15
+ # '#'.
16
+ #
17
+ # Features are typed. The following types are supported:
18
+ #
19
+ # - normal types:
20
+ # "bool", "float", "integer", "string"
21
+ # For the get() function with which features in the ConfigData object
22
+ # are accessed, the values are transformed from the strings in the
23
+ # config file to the appropriate class: Boolean, Float, Integer, String
24
+ #
25
+ # - other types:
26
+ # pattern: This is a feature that may include variables in
27
+ # <> brackets. When this feature is accesssed,
28
+ # values for these variables are given, i.e. this
29
+ # pattern has to be instantiated.
30
+ # For example, given a feature
31
+ #
32
+ # fileformat = features.<type>.train
33
+ #
34
+ # and method call
35
+ # instantiate("fileformat", "type" => "path")
36
+ #
37
+ # what is returned is a string "features.path.train"
38
+ #
39
+ # Variables used in a pattern have to be declared to
40
+ # the new() method.
41
+ #
42
+ # list: This is the only feature type where more than one
43
+ # feature specification with the same feature_name is allowed.
44
+ # The right-hand sides of a list feature are stored in an array.
45
+ #
46
+ # Given a 'list' feature 'bla', if the config file contains
47
+ #
48
+ # bla = blupp 1 2
49
+ # bla = la di da
50
+ #
51
+ # the list feature 'bla' is represented as follows:
52
+ # @features['bla'] = [['blupp', 1,2], ['la', 'di', 'da']]
53
+ #
54
+ # For comfortable access to a list feature, arbitrary
55
+ # access functions for list features can be defined.
56
+ #
57
+ #
58
+
59
+ require 'common/ruby_class_extensions'
60
+
61
+
62
+ #####################################################
63
+ ####################################################
64
+ # ConfigData is the main class in this package.
65
+ # It manages config files.
66
+ #
67
+ # To use it, inherit from it and just make a new new() method
68
+ # that only takes as input the name of the config file
69
+ # and that declares all the feature types and variable names
70
+ # needed for the given application.
71
+
72
+ class ConfigData
73
+
74
+ ###########
75
+ # new()
76
+ #
77
+ # reads the config file
78
+ #
79
+ # Input parameters: the name of the config file, a hash declaring all
80
+ # features by mapping feature names to their types,
81
+ # and an array of all variables that may occur in pattern type features
82
+ #
83
+ def initialize(filename, # string: name of config file
84
+ feature_types, # hash: feature_name => feature_type
85
+ variables) # array of strings: list of variables used in pattern features
86
+
87
+ @test_print = false
88
+ @variables = variables
89
+ @original_filename = filename
90
+
91
+ ##
92
+ # open config file
93
+ begin
94
+ file = File.new(filename)
95
+ rescue
96
+ $stderr.puts "Error: I could not open the experiment file " + filename
97
+ exit 1
98
+ end
99
+
100
+ # feature_types: hash: feature_name => feature_type
101
+ # features: hash: feature_name => value
102
+ @feature_types = feature_types
103
+ @features = Hash.new
104
+
105
+ # @list_feature_access: hash feature_name => Proc
106
+ # access method for list features
107
+ @list_feature_access = Hash.new
108
+
109
+ # pre-initialize list features to an empty array
110
+ @feature_types.each_pair { |feature_name, feature_type|
111
+ if feature_type == "list"
112
+ @features[feature_name] = Array.new
113
+ end
114
+ }
115
+
116
+ ##
117
+ # examine the config file contents
118
+
119
+ while (line = file.gets())
120
+ line = line.chomp().strip()
121
+ if line =~ /^#/ # comment
122
+ next
123
+ end
124
+
125
+ if line.empty? # nothing to be seen here
126
+ next
127
+ end
128
+
129
+ feature_name, rhs = extract_def(line)
130
+ set_entry(feature_name, rhs)
131
+ end
132
+ end
133
+
134
+ #####
135
+ # set_entry
136
+ #
137
+ # set an entry in the experiment file, either an existing or a new one
138
+ # but it must conform to the feature types declared in the new() method
139
+ def set_entry(feature_name, rhs)
140
+
141
+ unless @feature_types[feature_name]
142
+ $stderr.puts "Error in experiment file:"
143
+ $stderr.puts "Unknown parameter #{feature_name} in #{@original_filename}."
144
+ $stderr.puts "Expected features for this type of experiment file:"
145
+ $stderr.puts @feature_types.keys().join(", ")
146
+ exit 1
147
+ end
148
+
149
+ case @feature_types[feature_name]
150
+ when "pattern"
151
+ # file format specification
152
+
153
+ @features[feature_name] = ConfigFormatElement.new(rhs, @variables)
154
+
155
+ when "list"
156
+
157
+ # rhs is a string of space-separated words
158
+ # the first of them is the key, the rest is the value, to be
159
+ # stored as an array of words
160
+
161
+ # split rhs into words
162
+ if rhs.empty?
163
+ $stderr.puts "WARNING: I got an empty value for list feature #{feature_name}."
164
+ $stderr.puts "I'll ignore it."
165
+ else
166
+ unless @features[feature_name].include? rhs.split()
167
+ @features[feature_name] << rhs.split()
168
+ end
169
+ end
170
+
171
+ when "bool"
172
+ # boolean value
173
+ unless ["true", "false"].include? rhs
174
+ $stderr.puts "Error in experiment file:"
175
+ $stderr.puts "Value for #{feature_name} must be either 'true' or 'false'."
176
+ $stderr.puts "I got: "+ rhs.to_s
177
+ exit 1
178
+ end
179
+ @features[feature_name] = (rhs == "true")
180
+
181
+ when "float"
182
+ # float value
183
+ @features[feature_name] = rhs.to_f
184
+
185
+ when "integer"
186
+ # integer value
187
+ @features[feature_name] = rhs.to_i
188
+
189
+ when "string"
190
+ # string value
191
+ @features[feature_name] = rhs
192
+
193
+ else
194
+ raise "Unknown feature type for feature #{feature_name}: #{@feature_types[feature_name]}"
195
+ end
196
+ end
197
+
198
+ ####
199
+ # remove list entry in this config data structure:
200
+ # the lhs argument is the list feature name
201
+ # the rhs argument can be a string or a regexp.
202
+ # - string: each entry exactly matching the string is removed
203
+ # - regexp: each entry matching the regexp is removed
204
+ def unset_list_entry(lhs, #string: feature name
205
+ rhs) # string/regexp: righthand side
206
+ unless @feature_types[lhs] == "list"
207
+ $stderr.puts "Error in experiment file: "
208
+ $stderr.puts "Feature #{lhs} unknown or not of type list."
209
+ exit 1
210
+ end
211
+
212
+ case rhs.class.to_s
213
+ when "String"
214
+ rhs_match = Regexp.new("^" + Regexp.escape(rhs) + "$")
215
+ when "Regexp"
216
+ rhs_match = rhs
217
+ else
218
+ raise "Shouldn't be here: " + rhs.class.to_s
219
+ end
220
+
221
+ to_delete = @features[lhs].select { |entry| entry.join(" ") =~ rhs_match }
222
+ to_delete.each { |entry| @features[lhs].delete(entry) }
223
+ end
224
+
225
+
226
+ #####
227
+ # adjoin
228
+ #
229
+ # adds the information from a second ConfigData object
230
+ # to this one.
231
+ # Disjointness of feature names is assumed.
232
+ def adjoin(config_obj) # ConfigData object
233
+
234
+ ##
235
+ # sanity checks:
236
+ # the other object must be a ConfigData object
237
+ unless config_obj.kind_of? ConfigData
238
+ raise "I can only adjoin another ConfigData object"
239
+ end
240
+
241
+ # if feature name sets are not disjoint,
242
+ # ignore the feature names that I already have
243
+ other_features, other_feature_types, other_list_feature_access = config_obj.get_contents()
244
+ unless (@feature_types.keys & other_feature_types.keys).empty?
245
+ other_features = other_features.clone()
246
+ other_feature_types = other_feature_types.clone()
247
+ other_list_feature_access = other_list_feature_access.clone()
248
+
249
+ (@feature_types.keys() & other_feature_types.keys()).each { |overlap_feature|
250
+ other_features.delete(overlap_feature)
251
+ other_feature_types.delete(overlap_feature)
252
+ other_list_feature_access.delete(overlap_feature)
253
+ }
254
+ end
255
+
256
+ # now adjoin the contents of the other config objects to mine
257
+ @features.update(other_features)
258
+ @feature_types.update(other_feature_types)
259
+ @list_feature_access.update(other_list_feature_access)
260
+ end
261
+
262
+ #####
263
+ # get()
264
+ #
265
+ # returns the value of a given feature
266
+ # raises an error if no feature of this name
267
+ # has been declared to the new() method
268
+ #
269
+ # returns: a feature value. the type of the return value
270
+ # depends on the type of the feature.
271
+ # returns nil if the feature has not been set in the config file.
272
+ def get(name) # string: name of the feature to access
273
+ if @feature_types[name].nil?
274
+ raise "Unknown feature " + name
275
+ end
276
+
277
+ # may return nil if something has not been set
278
+ return @features[name]
279
+ end
280
+
281
+ ####
282
+ # get_type
283
+ #
284
+ # returns the type of a given feature,
285
+ # or nil if it is undefined
286
+ def get_type(feature_name)
287
+ return @feature_types[feature_name]
288
+ end
289
+
290
+ #####
291
+ # is_defined
292
+ #
293
+ # returns: true if a feature by this name has been set in the config file,
294
+ # false else
295
+ def is_defined(feature) # string: name of the feature
296
+ if @features[feature]
297
+ return true
298
+ else
299
+ return false
300
+ end
301
+ end
302
+
303
+ #####
304
+ # instantiate
305
+ #
306
+ # given a pattern type feature, and a hash
307
+ # mapping all variables occurring in the pattern to
308
+ # values, instantiate the pattern
309
+ #
310
+ # returns: string, the pattern with all variables
311
+ # instantiated with their values
312
+ def instantiate(key, # string: feature name
313
+ var_hash={}) # hash: variable name(string) => value(string)
314
+
315
+ unless @feature_types[key] == "pattern"
316
+ raise "Nothing known about pattern " + key
317
+ end
318
+ unless @features[key]
319
+ raise "Please define pattern in configuration file: " + key
320
+ end
321
+
322
+ # piece together the file name
323
+ # expand in case it is a filename/directory
324
+ return @features[key].instantiate(var_hash)
325
+ end
326
+
327
+ #####
328
+ # get_filename:
329
+ #
330
+ # synonym for instantiate()
331
+ def get_filename(key, var_hash={})
332
+ return instantiate(key, var_hash)
333
+ end
334
+
335
+ #####
336
+ # set_test_print
337
+ #
338
+ # set test output to on (true) or off (false)
339
+ def set_test_print(tf) # boolean
340
+ unless [true, false].include? tf
341
+ raise "Shouldn't be here"
342
+ end
343
+ @test_print = tf
344
+ end
345
+
346
+
347
+ #####
348
+ # get_all_filenames
349
+ #
350
+ # given a directory, a pattern type feature,
351
+ # and a hash mapping some of the pattern's variables
352
+ # to values, return all filenames in the given directory
353
+ # that match the partially instantiated pattern
354
+ #
355
+ # returns: an array of pairs [filename(string), matches(hash)]
356
+ # where the matches hash maps all variables of the pattern to
357
+ # their values as instantiated in the given filename
358
+ # The filename doesn't include the directory.
359
+ def get_all_filenames(dir, #string: directory name
360
+ key, # string: name of pattern type feature
361
+ var_hash={}) # hash: variable name(string) => value(string)
362
+
363
+ unless @feature_types[key] == "pattern"
364
+ raise "Nothing known about file format " + key
365
+ end
366
+
367
+ # array of pairs [filename(string), matches(hash)]
368
+ filenames = Array.new
369
+
370
+ # iterate through all files of this directory
371
+ Dir.foreach(dir) { |filename|
372
+ # does the filename match the pattern of the feature "key"?
373
+ if (matches = @features[key].match(filename, var_hash))
374
+ # do the variable values for this filename conform
375
+ # to the variable values given in var_hash?
376
+ if @test_print
377
+ $stderr.puts "got " + filename
378
+ end
379
+ if var_hash.keys.select { |var|
380
+ matches[var] != var_hash[var]
381
+ }.empty?
382
+ filenames << [filename, matches]
383
+ else
384
+ # mismatch for given variables
385
+ if @test_print
386
+ var_hash.keys.each { |var|
387
+ if matches[var] != var_hash[var]
388
+ $stderr.puts "Mismatch for " + var + ": " +
389
+ matches[var].to_s + " vs. " + var_hash[var]
390
+ end
391
+ }
392
+ end
393
+ end
394
+ end
395
+ }
396
+
397
+ return filenames
398
+ end
399
+
400
+ #####
401
+ # set list feature access:
402
+ #
403
+ # for a given list type feature, set a method that should
404
+ # be used for accessing the feature.
405
+ #
406
+ # method signature: first parameter is an array of tuples of strings.
407
+ # for each experiment file entry
408
+ # feature = rhs
409
+ # there will be a tuple rhs.split() in the list.
410
+ #
411
+ # The other parameters are not checked by ConfigData, there
412
+ # may be arbitrarily many
413
+ def set_list_feature_access(feature_name, # string: name of the feature
414
+ proc) # proc: access method for list feature
415
+ unless @feature_types[feature_name] == 'list'
416
+ raise "Cannot set list feature access to non-list feature #{feature_name}"
417
+ end
418
+
419
+ @list_feature_access[feature_name] = proc
420
+ end
421
+
422
+ #####
423
+ # get_lf
424
+ #
425
+ # access a list type feature for which an access function
426
+ # has been set using set_list_feature_access
427
+ #
428
+ # returns: whatever the access function returns
429
+ def get_lf(feature_name, # string: name of list feature
430
+ *parameters) # parameters for access function, collapsed into an array here
431
+
432
+ unless @list_feature_access[feature_name]
433
+ raise "I have no list feature access method for #{feature_name}."
434
+ end
435
+
436
+ # call access function, re-exploding the collapsed parameters and
437
+ # adding the list of values for the list feature as first parameter
438
+ return @list_feature_access[feature_name].call(@features[feature_name], *parameters)
439
+ end
440
+
441
+
442
+ protected
443
+
444
+ #####
445
+ # extract_def
446
+ #
447
+ # given a line of the config file,
448
+ # it is assumed that it has the structure
449
+ # [white space] string [white space] = [white space] stuff
450
+ # 'stuff' may include further white space, 'string' may not.
451
+ #
452
+ # returns: a pair of strings, the left-hand side and the right-hand side
453
+ # of the =, minus the [white space] in the places shown above
454
+
455
+ def extract_def(line) # string: line from config file
456
+ unless line =~ /^\s*(\w+)\s*=\s*([^\s].*)$/
457
+ $stderr.puts "Error in experiment file: "
458
+ $stderr.puts "I couldn't analyze the following line: "
459
+ $stderr.puts line
460
+ exit 1
461
+ end
462
+ return [$1, $2]
463
+ end
464
+
465
+ ####
466
+ # access to the object variables
467
+ def get_contents()
468
+ return [@features, @feature_types, @list_feature_access]
469
+ end
470
+
471
+ end
472
+
473
+
474
+ ##############################
475
+ # ConfigFormatelement is an auxiliary class
476
+ # of ConfigData.
477
+ # It keeps track of feature patterns with variables in them
478
+ # that can be instantiated.
479
+
480
+ class ConfigFormatElement
481
+
482
+ # new()
483
+ #
484
+ # given a pattern and a list of variable names,
485
+ # analyze the pattern and remember the variable names
486
+ #
487
+ def initialize(string, # string: feature name, may include names of variables.
488
+ # they are included in <>
489
+ variables) # list of variable names that can occur
490
+
491
+ @variables = variables
492
+
493
+ # pattern: this is what the 'string' is split into,
494
+ # an array of elements that are either fixed parts or variables.
495
+ # fixed part: pair [item:string, "string"]
496
+ # variable: pair [variable_name:string, "variable"]
497
+ @pattern = Array.new
498
+ state = "out"
499
+ item = ""
500
+
501
+ # analyze string,
502
+ # split into variables and fixed parts
503
+ string.split(//).each { |char|
504
+
505
+ case state
506
+ when "in"
507
+ case char
508
+ when "<"
509
+ raise "Duplicate < in " + string
510
+ when ">"
511
+ unless @variables.include? item
512
+ raise "Unknown variable " + item
513
+ end
514
+ @pattern << [item, "variable"]
515
+ item = ""
516
+ state = "out"
517
+ else
518
+ item << char
519
+ state = "in"
520
+ end
521
+
522
+ when "out"
523
+ case char
524
+ when "<"
525
+ unless item.empty?
526
+ @pattern << [item, "string"]
527
+ item = ""
528
+ end
529
+ state = "in"
530
+ when ">"
531
+ raise "Unexpected > in " + string
532
+ else
533
+ item << char
534
+ state = "out"
535
+ end
536
+
537
+ else
538
+ raise "Shouldn't be here"
539
+ end
540
+ }
541
+
542
+ # read through the whole of "string"
543
+ # end state has to be "out"
544
+ unless state == "out"
545
+ raise "Unclosed < in " + string
546
+ end
547
+
548
+ # last bit still to be recorded?
549
+ unless item.empty?
550
+ @pattern << [item, "string"]
551
+ end
552
+
553
+ # make regexp for matching this pattern
554
+ @regexp = make_regexp(@pattern)
555
+ end
556
+
557
+ # instantiate: given pairs of variable names and variable values,
558
+ # instantiate @pattern to a string in which var names are replaced
559
+ # by their values
560
+ #
561
+ # returns: string
562
+ def instantiate(var_hash) # hash variable name(string) => variable value(string)
563
+
564
+ # instantiate the pattern
565
+ return @pattern.map { |item, string_or_var|
566
+
567
+ case string_or_var
568
+ when "string"
569
+ item
570
+
571
+ when "variable"
572
+
573
+ if var_hash[item].nil?
574
+ raise "Missing variable instantiation: " + item
575
+ end
576
+ var_hash[item]
577
+
578
+ else
579
+ raise "Shouldn't be here"
580
+ end
581
+ }.join
582
+ end
583
+
584
+ # match()
585
+ #
586
+ # given a string, try to match it against the @pattern
587
+ # while setting the variables given in 'fillers' to
588
+ # the values given in that hash.
589
+ #
590
+ # returns: if the string matches, a hash variable name => value
591
+ # that includes the fillers given as a parameter as well as
592
+ # values for all other variables mentioned in @pattern,
593
+ # or false if no match.
594
+ def match(string, # a string
595
+ fillers = nil) # hash variable name(string) => value(string)
596
+
597
+ # have we been given partial info about variables?
598
+ if fillers
599
+ match = make_regexp(@pattern, fillers).match(string)
600
+ # $stderr.print "matching " + make_regexp(@pattern, fillers).source +
601
+ # " against " + string + " "
602
+ # if match.nil?
603
+ # $stderr.puts "no"
604
+ # else
605
+ # $stderr.puts "yes"
606
+ # end
607
+ else
608
+ match = @regexp.match(string)
609
+ end
610
+
611
+ if match.nil?
612
+ # no match via the regular expression
613
+ return false
614
+ end
615
+
616
+ # regular expression matched.
617
+ # construct return value in hash
618
+ # retv: variable name(string) => value(string)
619
+ retv = Hash.new()
620
+ if fillers
621
+ # include given fillers in retv hash
622
+ fillers.each_pair { |name, val| retv[name] = val }
623
+ end
624
+
625
+ # now put values for other variables in @pattern into retv
626
+ index = 1
627
+ @pattern.to_a.select { |item, string_or_var|
628
+ string_or_var == "variable"
629
+ }.select { |item, string_or_var|
630
+ fillers.nil? or
631
+ fillers[item].nil?
632
+ }.each { |item, string_or_var|
633
+ # for all items on the pattern list
634
+ # that are variables and
635
+ # haven't been filled by the "fillers" list already:
636
+ # fill from matches
637
+
638
+ if match[index].nil?
639
+ raise "Match, but not enough matched elements? Strange."
640
+ end
641
+
642
+ if retv[item].nil?
643
+ retv[item] = match[index]
644
+ else
645
+ unless retv[item] == match[index]
646
+ return false
647
+ end
648
+ end
649
+
650
+ index += 1
651
+ }
652
+
653
+ return retv
654
+ end
655
+
656
+ # used_variables
657
+ #
658
+ # returns: an array of variable names used in @pattern
659
+ def used_variables()
660
+ return @pattern.select { |item, string_or_var|
661
+ string_or_var == "variable"
662
+ }.map { |item, string_or_var| item}
663
+ end
664
+
665
+ ####################
666
+ private
667
+
668
+ # make_regexp:
669
+ # make regular expression from a pattern
670
+ # together with some variable fillers
671
+ #
672
+ # returns: Regexp object
673
+ def make_regexp(pattern, # array of pairs [string, "string"] or [string, "variable"]
674
+ fillers = nil) # hash variable name(string) => value(string)
675
+ return (Regexp.new "^" +
676
+ pattern.map { |item, string_or_var|
677
+ case string_or_var
678
+ when "variable"
679
+ if fillers and
680
+ fillers[item]
681
+ Regexp.escape(fillers[item])
682
+ else
683
+ "(.+)"
684
+ end
685
+ when "string"
686
+ Regexp.escape(item)
687
+ else
688
+ raise "Shouldn't be here"
689
+ end
690
+ }.join + "$")
691
+ end
692
+
693
+ end
694
+