frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,148 @@
1
+ # GfInduceFeature
2
+ # Katrin Erk Jan 06
3
+ #
4
+ # use result of GfInduce.rb as
5
+ # feature for Rosy
6
+
7
+ require "rosy/GfInduce"
8
+ require "rosy/AbstractFeatureAndExternal"
9
+ require "common/ruby_class_extensions"
10
+
11
+ ###
12
+ # make filename for GfInduce picle file
13
+ def filename_gfmap(exp, # ExternalConfigData object
14
+ interpreter) # SynInterpreter class
15
+
16
+ # output dir as given in my experiment file
17
+ # If there is an experiment ID, make subdirectory
18
+ # named after the experiment ID and place the data there.
19
+ output_dir = File.new_dir(exp.get("directory"))
20
+ if exp.get("experiment_id")
21
+ output_dir = File.new_dir(output_dir, exp.get("experiment_id"))
22
+ end
23
+
24
+ # output file name:
25
+ # Gfmap.{<service>=<system_name>.}*{OPT<service>=<system_name>.}*pkl
26
+ return output_dir +
27
+ "Gfmap." +
28
+ interpreter.systems().to_a.map { |service, system_name|
29
+ service.to_s+ "=" + system_name.to_s
30
+ }.sort.join(".") + "." +
31
+ interpreter.optional_systems().to_a.map { |service, system_name|
32
+ "OPT" + service.to_s + "=" + system_name.to_s
33
+ }.sort.join(".") + ".pkl"
34
+ end
35
+
36
+ ################################
37
+ # base class for all following feature extractors
38
+ class GfInduceFeatureExtractor < ExternalFeatureExtractor
39
+ GfInduceFeatureExtractor.announce_me()
40
+
41
+ @@okay = true # external experiment file present?
42
+ @@gf_obj = nil # GfInduce object
43
+ @@node_to_gf = nil # Hash: SynNodes of a sentence -> Gf label
44
+
45
+ def GfInduceFeatureExtractor.designator()
46
+ return "gf_fn"
47
+ end
48
+ def GfInduceFeatureExtractor.feature_names()
49
+ return ["gf_fn"]
50
+ end
51
+ def GfInduceFeatureExtractor.sql_type()
52
+ return "VARCHAR(25)"
53
+ end
54
+ def GfInduceFeatureExtractor.feature_type()
55
+ return "syn"
56
+ end
57
+ def GfInduceFeatureExtractor.phase()
58
+ return "phase 1"
59
+ end
60
+
61
+ ###
62
+ # set sentence, set node, set other settings:
63
+ # this is done prior to
64
+ # feature computation using compute_feature()
65
+ # such that computations that stay the same for
66
+ # several features can be done in advance
67
+ #
68
+ # This is just relevant for Phase 1
69
+ #
70
+ # returns: false/nil if there was a problem
71
+ def GfInduceFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
72
+ frame) # FrameNode object
73
+
74
+ super(sent, frame)
75
+
76
+ if @@okay
77
+ # we can actually compute something
78
+
79
+ # let the GF object compute all subcat frames
80
+ # for the target of this frame
81
+ subcatframes_of_current_target = @@gf_obj.apply(frame.target.children())
82
+
83
+ # keep the most frequent one of the
84
+ # subcat frames returned by the GF object:
85
+ if subcatframes_of_current_target.empty?
86
+ # no subcat frames returned
87
+ subcatframe = []
88
+ else
89
+ # we have at least one subcat frame:
90
+ # keep the most frequent one of them
91
+ #
92
+ # Also, subcatframes_of_current_target
93
+ # contains triples [frame, actual_subcatframe, frequency]
94
+ # Of these, keep just the actual_subcatframe
95
+
96
+ subcatframe = subcatframes_of_current_target.sort { |a, b|
97
+ # sort by frequency
98
+ b.last <=> a.last
99
+ }.first[1]
100
+ end
101
+
102
+ # change into a mapping node(SynNode) -> GF(string)
103
+ @@node_to_gf = Hash.new
104
+ subcatframe.each { |gf, prep, fe, synnodes|
105
+ synnodes.each { |node|
106
+ @@node_to_gf[node] = "#{gf} #{prep}"
107
+ }
108
+ }
109
+ end
110
+ end
111
+
112
+
113
+ ###
114
+ # Initialize: read GFInduce pickle
115
+ def initialize(exp, # experiment file object
116
+ interpreter_class) # SynInterpreter class
117
+
118
+ super(exp, interpreter_class)
119
+
120
+ if @exp_external
121
+ pickle_filename = filename_gfmap(@exp_external, @@interpreter_class)
122
+ @@gf_obj = GfInduce.from_file(pickle_filename)
123
+ @@okay = true
124
+
125
+ else
126
+ # signal that you cannot compute anything
127
+ @@okay = false
128
+ end
129
+ end
130
+
131
+ ###
132
+ # compute: compute features
133
+ #
134
+ # returns an array of features (strings), length the same as the
135
+ # length of feature_names()
136
+ #
137
+ # here: array of length one, content either a string or nil
138
+ def compute_features()
139
+ # current node: @@node
140
+ # check whether the current node has been assigned a slot
141
+ # in the subcat frame
142
+ if @@okay
143
+ return [ @@node_to_gf[@@node] ]
144
+ else
145
+ return [ nil ]
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,294 @@
1
+ ###########
2
+ #
3
+ # ke / sp 12 04 05
4
+ #
5
+ # class for input data object
6
+ # offers methods for preprocessing and
7
+ # featurization
8
+
9
+ # Salsa packages
10
+ require "common/Parser"
11
+ require "common/SalsaTigerRegXML"
12
+ require "common/ruby_class_extensions"
13
+
14
+ # Fred/Rosy packages
15
+ require "rosy/FailedParses"
16
+ require "common/RosyConventions"
17
+ require "rosy/RosyFeatureExtractors"
18
+ require "rosy/RosyPhase2FeatureExtractors"
19
+ require "rosy/RosyPruning"
20
+ require "rosy/GfInduceFeature"
21
+ require "common/FixSynSemMapping"
22
+
23
+ class InputData
24
+
25
+ ###
26
+ def initialize(exp_object, # RosyConfigData object
27
+ dataset, # train/test
28
+ feature_info_object, # FeatureInfo object
29
+ interpreter_class, # SynInterpreter class
30
+ input_dir) # Directory with input files
31
+
32
+ @exp = exp_object
33
+ @dataset = dataset
34
+ @interpreter_class = interpreter_class
35
+ @input_dir = input_dir
36
+ # store information about failed parses here
37
+ @failed_parses = FailedParses.new()
38
+
39
+ # feature_extractors_phase1: array of AbstractFeatureExtractor objects
40
+ @extractors_p1_rosy, @extractors_p1_other = feature_info_object.get_extractor_objects("phase 1",
41
+ @interpreter_class)
42
+
43
+ # global settings
44
+ unless RosyFeatureExtractor.set("split_nones" => @exp.get("split_nones"))
45
+ raise "Some grave problem during feature extractor initialization"
46
+ end
47
+
48
+ # # nothing to set here for now, so deactivated
49
+ # @extractors_p1_other.each { |extractor_obj|
50
+ # unless extractor_obj.class.set()
51
+ # raise "Some grave problem during feature extractor initialization"
52
+ # end
53
+ # }
54
+
55
+
56
+ # feature_extractors_phase2: array of AbstractFeatureExtractor objects
57
+ extractors_p2_rosy, extractors_p2_other = feature_info_object.get_extractor_objects("phase 2",
58
+ @interpreter_class)
59
+ @feature_extractors_phase2 = extractors_p2_rosy + extractors_p2_other
60
+ end
61
+
62
+ ###
63
+ # each_instance_phase1()
64
+ #
65
+ # reads the input data from file(s), in the specific input format,
66
+ # separates it into instances,
67
+ # threads it through all phase 1 feature extractors
68
+ # and yields one feature vector per instance
69
+ #
70
+ # yields: pairs [feature_name(string), feature_value(object)]
71
+
72
+ def each_instance_phase1()
73
+ Dir[@input_dir+"*.xml"]. each {|parsefilename|
74
+
75
+ xmlFile = FilePartsParser.new(parsefilename)
76
+ $stderr.puts "Processing #{parsefilename}"
77
+ xmlFile.scan_s {|sent_string|
78
+ sent = SalsaTigerSentence.new(sent_string)
79
+
80
+ # preprocessing: possibly change the SalsaTigerSentence object
81
+ # before featurization
82
+ preprocess(sent)
83
+
84
+ sent.each_frame{ |frame|
85
+
86
+ # skip failed parses
87
+ if sent.get_attribute("failed")
88
+ handle_failed_parse(sent, frame)
89
+ next
90
+ end
91
+
92
+ # Tell feature extractors about the sentence and frame:
93
+ # first Rosy feature extractors, then the others
94
+ # if there is a problem, skip this frame
95
+ unless RosyFeatureExtractor.set_sentence(sent, frame)
96
+ next
97
+ end
98
+ skip_frame = false
99
+ @extractors_p1_other.each { |extractor_obj|
100
+ unless extractor_obj.class.set_sentence(sent, frame)
101
+ skip_frame = true
102
+ break
103
+ end
104
+ }
105
+ if skip_frame
106
+ next
107
+ end
108
+
109
+ sent.each_syn_node { |syn_node|
110
+
111
+ # Tell feature extractors about the current node:
112
+ # first Rosy feature extractors, then the others
113
+ # if there is a problem, skip this node
114
+ unless RosyFeatureExtractor.set_node(syn_node)
115
+ next
116
+ end
117
+ skip_node = false
118
+ @extractors_p1_other.each { |extractor_obj|
119
+ unless extractor_obj.class.set_node(syn_node)
120
+ skip_node = true
121
+ break
122
+ end
123
+ }
124
+ if skip_node
125
+ next
126
+ end
127
+
128
+ # features: array of pairs: [feature_name(string), feature_value(object)]
129
+ features = Array.new
130
+ (@extractors_p1_rosy + @extractors_p1_other).each { |extractor|
131
+ # compute features
132
+ feature_names = extractor.class.feature_names()
133
+ feature_index = 0
134
+
135
+ # append new features to features array
136
+ features.concat extractor.compute_features().map { |feature_value|
137
+ feature_name = feature_names[feature_index]
138
+ feature_index += 1
139
+
140
+ # sanity check: feature value longer than the allotted space in the DB?
141
+ check_feature_length(feature_name, feature_value, extractor)
142
+
143
+ [feature_name, nonnil_feature(feature_value, extractor.class.sql_type()) ]
144
+ }
145
+ }
146
+ yield features
147
+ } # each syn node
148
+ } # each frame
149
+ } # each sentence
150
+ }
151
+ end
152
+
153
+ ###
154
+ # each_phase2_column
155
+ #
156
+ # This method implements the application of the
157
+ # phase 2 extractors to data.
158
+ #
159
+ # Given a database view (of either training or test data),
160
+ # assign a new feature value to each instance
161
+ #
162
+ # yields pairs [feature_name(string), feature_values(array)]
163
+ # The feature_values array has as many lines as the view has instances
164
+ # so the yield of this method can be fed directly into view.update_column()
165
+ def each_phase2_column(view) # View object: training or test data
166
+
167
+ @feature_extractors_phase2.each { |extractor|
168
+ # apply the extractor
169
+ feature_columns = extractor.compute_features_on_view(view)
170
+ # interleave with feature values and yield
171
+ feature_index = 0
172
+ feature_names = extractor.class.feature_names()
173
+ feature_columns.each { |feature_values|
174
+ yield [
175
+ feature_names[feature_index],
176
+ feature_values.map { |feature_val| nonnil_feature(feature_val, extractor.class.sql_type) }
177
+ ]
178
+ feature_index += 1
179
+ }
180
+ }
181
+ end
182
+
183
+ ###
184
+ # get_failed_parses
185
+ #
186
+ # returns the FailedParses object in which the info about failed parses has been stored
187
+ def get_failed_parses()
188
+ return @failed_parses
189
+ end
190
+
191
+ #################################
192
+ private
193
+
194
+
195
+ ###
196
+ def nonnil_feature(feature_value,
197
+ sql_type)
198
+
199
+ # feature value nil? then change to noval
200
+ if feature_value.nil? and sql_type =~ /CHAR/
201
+ return @exp.get("noval")
202
+ elsif feature_value.class.to_s == "String" and feature_value.empty?
203
+ return @exp.get("noval")
204
+ elsif feature_value.nil?
205
+ return 0
206
+ else
207
+ return feature_value
208
+ end
209
+ end
210
+
211
+ ###
212
+ # preprocess: possibly change the given SalsaTigerSentence
213
+ # to enable better learning
214
+ def preprocess(sent) # SalsaTigerSentence object
215
+
216
+
217
+ if @dataset == "train" and
218
+ (@exp.get("fe_syn_repair") or @exp.get("fe_rel_repair"))
219
+ FixSynSemMapping.fixit(sent, @exp, @interpreter_class)
220
+ end
221
+ end
222
+
223
+ ###
224
+ # register failed parses
225
+ def handle_failed_parse(sent, # SalsaTigerSentence object
226
+ frame) # FrameNode
227
+
228
+ # target POS
229
+ if frame.target()
230
+ main_target = @interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
231
+ else
232
+ main_target = nil
233
+ end
234
+ if main_target
235
+ target_pos = @interpreter_class.category(main_target)
236
+ else
237
+ target_pos = nil
238
+ end
239
+ if frame.target()
240
+ target_str = frame.target().yield_nodes_ordered().map { |t_node|
241
+ if t_node.is_syntactic?
242
+ @interpreter_class.lemma_backoff(t_node)
243
+ else
244
+ # not a syntactic node: maybe an unassigned target?
245
+ ""
246
+ end
247
+ }.join(" ")
248
+ else
249
+ target_str = ""
250
+ end
251
+
252
+ @failed_parses.register(construct_instance_id(sent.id(), frame.id()),
253
+ frame.name(),
254
+ target_str,
255
+ target_pos,
256
+ frame.children.map { |fe| fe.name })
257
+
258
+ end
259
+
260
+ ###
261
+ # sanity check: feature value longer than the allotted space in the DB?
262
+ def check_feature_length(feature_name, # string
263
+ feature_value, # object
264
+ extractor_obj) # AbstractFeatureExtractor object
265
+
266
+ if extractor_obj.class.sql_type() =~ /(\d+)/
267
+ # sql type contains some statement about the length.
268
+ # just crudely compare to feature length
269
+ length = $1.to_i
270
+ if feature_value.class == String and
271
+ feature_value.length() > length
272
+
273
+ if feature_name == "sentid"
274
+ print length;
275
+ print feature_value;
276
+ print feature_value.length();
277
+ # if the sentence (instance) ID is too long, we cannot go on.
278
+ $stderr.puts "Error: Instance ID is longer than its DB column."
279
+ $stderr.puts "Please increase the DB column size in {Tiger,Collins}FeatureExtractors.rb"
280
+ raise "SQL entry length surpassed"
281
+
282
+ elsif @exp.get("verbose")
283
+ # KE Feb 07: don't print warning,
284
+ # this is just too frequent
285
+ # for other features, we just issue a warning, and only if we are verbose
286
+
287
+ # $stderr.puts "Warning: feature #{feature_name} longer than its DB column (#{length.to_s} vs #{feature_value.length()}): #{feature_value}"
288
+ end # feature name check
289
+ end # length surpassed
290
+ end # length found in sql type
291
+
292
+ end
293
+
294
+ end
@@ -0,0 +1,115 @@
1
+ require 'common/ConfigData'
2
+
3
+ ##############################
4
+ # Class RosyConfigData
5
+ #
6
+ # inherits from ConfigData,
7
+ # sets features for ROSY
8
+
9
+ class RosyConfigData < ConfigData
10
+ def initialize(filename)
11
+ super(filename, # config file
12
+ { # features
13
+ "feature" => "list",
14
+ "classifier" => "list",
15
+
16
+ "verbose" => "bool" ,
17
+ "enduser_mode" => "bool",
18
+
19
+ "experiment_ID" => "string",
20
+
21
+ "directory_input_train" => "string",
22
+ "directory_input_test" => "string",
23
+ "directory_output" => "string",
24
+
25
+ "preproc_descr_file_train" => "string",
26
+ "preproc_descr_file_test" => "string",
27
+ "external_descr_file" => "string",
28
+
29
+ "dbtype" => "string", # "mysql" or "sqlite"
30
+
31
+ "host" => "string", # DB access: sqlite only
32
+ "user" => "string",
33
+ "passwd" => "string",
34
+ "dbname" => "string",
35
+
36
+ "data_dir" => "string", # for external use
37
+ "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
38
+
39
+ "classifier_dir" => "string", # if present, special directory for classifiers
40
+
41
+ "classif_column_name" => "string",
42
+ "main_table_name" => "pattern",
43
+ "test_table_name" => "pattern",
44
+
45
+ "eval_file" => "pattern",
46
+ "log_file" => "pattern",
47
+ "failed_file" => "pattern",
48
+ "classifier_file" => "pattern",
49
+ "classifier_output_file" => "pattern",
50
+ "noval" => "string",
51
+
52
+
53
+ "split_nones" => "bool",
54
+ "print_eval_log" => "bool",
55
+ "assume_argrec_perfect" => "bool",
56
+ "xwise_argrec" => "string",
57
+ "xwise_arglab" => "string",
58
+ "xwise_onestep" => "string",
59
+
60
+ "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
61
+ "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
62
+
63
+ "prune" => "string", # pruning prior to argrec?
64
+
65
+ },
66
+ ["exp_ID", "test_ID", "split_ID", "feature_name", "classif", "step",
67
+ "group", "dataset","mode"] # variables
68
+ )
69
+
70
+ # set access functions for list features
71
+ set_list_feature_access("feature",
72
+ method("access_feature"))
73
+
74
+ # set access functions for list features
75
+ set_list_feature_access("classifier",
76
+ method("access_feature"))
77
+
78
+ end
79
+
80
+ ###
81
+ # protected
82
+
83
+ #####
84
+ # access_feature
85
+ #
86
+ # access function for feature 'feature'
87
+ #
88
+ # assumed format in the config file:
89
+ #
90
+ # feature = path [option]*
91
+ #
92
+ # i.e. first the name of the feature type to use, then
93
+ # optionally options associated with that feature,
94
+ # e.g. 'argrec': use that feature only when computing argrec
95
+ #
96
+ # the access function is called with parameter val_list, an array of
97
+ # string tuples, one string tuple for each feature defined.
98
+ # the first string in the tuple is the feature name, the rest are the options
99
+ #
100
+ # returns: a list of pairs [feature_name(string), options(array:string)]
101
+ # of defined features
102
+ def access_feature(val_list) # array:array:string: list of tuples defined in config file
103
+ # for feature 'feature'
104
+ if val_list.nil?
105
+ return []
106
+ else
107
+ return val_list.map { |feature_descr_tuple|
108
+ [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
109
+ }
110
+ end
111
+ end
112
+ end
113
+
114
+
115
+