frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,148 @@
1
+ # GfInduceFeature
2
+ # Katrin Erk Jan 06
3
+ #
4
+ # use result of GfInduce.rb as
5
+ # feature for Rosy
6
+
7
+ require "rosy/GfInduce"
8
+ require "rosy/AbstractFeatureAndExternal"
9
+ require "common/ruby_class_extensions"
10
+
11
+ ###
12
+ # make filename for GfInduce picle file
13
+ def filename_gfmap(exp, # ExternalConfigData object
14
+ interpreter) # SynInterpreter class
15
+
16
+ # output dir as given in my experiment file
17
+ # If there is an experiment ID, make subdirectory
18
+ # named after the experiment ID and place the data there.
19
+ output_dir = File.new_dir(exp.get("directory"))
20
+ if exp.get("experiment_id")
21
+ output_dir = File.new_dir(output_dir, exp.get("experiment_id"))
22
+ end
23
+
24
+ # output file name:
25
+ # Gfmap.{<service>=<system_name>.}*{OPT<service>=<system_name>.}*pkl
26
+ return output_dir +
27
+ "Gfmap." +
28
+ interpreter.systems().to_a.map { |service, system_name|
29
+ service.to_s+ "=" + system_name.to_s
30
+ }.sort.join(".") + "." +
31
+ interpreter.optional_systems().to_a.map { |service, system_name|
32
+ "OPT" + service.to_s + "=" + system_name.to_s
33
+ }.sort.join(".") + ".pkl"
34
+ end
35
+
36
+ ################################
37
+ # base class for all following feature extractors
38
+ class GfInduceFeatureExtractor < ExternalFeatureExtractor
39
+ GfInduceFeatureExtractor.announce_me()
40
+
41
+ @@okay = true # external experiment file present?
42
+ @@gf_obj = nil # GfInduce object
43
+ @@node_to_gf = nil # Hash: SynNodes of a sentence -> Gf label
44
+
45
+ def GfInduceFeatureExtractor.designator()
46
+ return "gf_fn"
47
+ end
48
+ def GfInduceFeatureExtractor.feature_names()
49
+ return ["gf_fn"]
50
+ end
51
+ def GfInduceFeatureExtractor.sql_type()
52
+ return "VARCHAR(25)"
53
+ end
54
+ def GfInduceFeatureExtractor.feature_type()
55
+ return "syn"
56
+ end
57
+ def GfInduceFeatureExtractor.phase()
58
+ return "phase 1"
59
+ end
60
+
61
+ ###
62
+ # set sentence, set node, set other settings:
63
+ # this is done prior to
64
+ # feature computation using compute_feature()
65
+ # such that computations that stay the same for
66
+ # several features can be done in advance
67
+ #
68
+ # This is just relevant for Phase 1
69
+ #
70
+ # returns: false/nil if there was a problem
71
+ def GfInduceFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
72
+ frame) # FrameNode object
73
+
74
+ super(sent, frame)
75
+
76
+ if @@okay
77
+ # we can actually compute something
78
+
79
+ # let the GF object compute all subcat frames
80
+ # for the target of this frame
81
+ subcatframes_of_current_target = @@gf_obj.apply(frame.target.children())
82
+
83
+ # keep the most frequent one of the
84
+ # subcat frames returned by the GF object:
85
+ if subcatframes_of_current_target.empty?
86
+ # no subcat frames returned
87
+ subcatframe = []
88
+ else
89
+ # we have at least one subcat frame:
90
+ # keep the most frequent one of them
91
+ #
92
+ # Also, subcatframes_of_current_target
93
+ # contains triples [frame, actual_subcatframe, frequency]
94
+ # Of these, keep just the actual_subcatframe
95
+
96
+ subcatframe = subcatframes_of_current_target.sort { |a, b|
97
+ # sort by frequency
98
+ b.last <=> a.last
99
+ }.first[1]
100
+ end
101
+
102
+ # change into a mapping node(SynNode) -> GF(string)
103
+ @@node_to_gf = Hash.new
104
+ subcatframe.each { |gf, prep, fe, synnodes|
105
+ synnodes.each { |node|
106
+ @@node_to_gf[node] = "#{gf} #{prep}"
107
+ }
108
+ }
109
+ end
110
+ end
111
+
112
+
113
+ ###
114
+ # Initialize: read GFInduce pickle
115
+ def initialize(exp, # experiment file object
116
+ interpreter_class) # SynInterpreter class
117
+
118
+ super(exp, interpreter_class)
119
+
120
+ if @exp_external
121
+ pickle_filename = filename_gfmap(@exp_external, @@interpreter_class)
122
+ @@gf_obj = GfInduce.from_file(pickle_filename)
123
+ @@okay = true
124
+
125
+ else
126
+ # signal that you cannot compute anything
127
+ @@okay = false
128
+ end
129
+ end
130
+
131
+ ###
132
+ # compute: compute features
133
+ #
134
+ # returns an array of features (strings), length the same as the
135
+ # length of feature_names()
136
+ #
137
+ # here: array of length one, content either a string or nil
138
+ def compute_features()
139
+ # current node: @@node
140
+ # check whether the current node has been assigned a slot
141
+ # in the subcat frame
142
+ if @@okay
143
+ return [ @@node_to_gf[@@node] ]
144
+ else
145
+ return [ nil ]
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,294 @@
1
+ ###########
2
+ #
3
+ # ke / sp 12 04 05
4
+ #
5
+ # class for input data object
6
+ # offers methods for preprocessing and
7
+ # featurization
8
+
9
+ # Salsa packages
10
+ require "common/Parser"
11
+ require "common/SalsaTigerRegXML"
12
+ require "common/ruby_class_extensions"
13
+
14
+ # Fred/Rosy packages
15
+ require "rosy/FailedParses"
16
+ require "common/RosyConventions"
17
+ require "rosy/RosyFeatureExtractors"
18
+ require "rosy/RosyPhase2FeatureExtractors"
19
+ require "rosy/RosyPruning"
20
+ require "rosy/GfInduceFeature"
21
+ require "common/FixSynSemMapping"
22
+
23
+ class InputData
24
+
25
+ ###
26
+ def initialize(exp_object, # RosyConfigData object
27
+ dataset, # train/test
28
+ feature_info_object, # FeatureInfo object
29
+ interpreter_class, # SynInterpreter class
30
+ input_dir) # Directory with input files
31
+
32
+ @exp = exp_object
33
+ @dataset = dataset
34
+ @interpreter_class = interpreter_class
35
+ @input_dir = input_dir
36
+ # store information about failed parses here
37
+ @failed_parses = FailedParses.new()
38
+
39
+ # feature_extractors_phase1: array of AbstractFeatureExtractor objects
40
+ @extractors_p1_rosy, @extractors_p1_other = feature_info_object.get_extractor_objects("phase 1",
41
+ @interpreter_class)
42
+
43
+ # global settings
44
+ unless RosyFeatureExtractor.set("split_nones" => @exp.get("split_nones"))
45
+ raise "Some grave problem during feature extractor initialization"
46
+ end
47
+
48
+ # # nothing to set here for now, so deactivated
49
+ # @extractors_p1_other.each { |extractor_obj|
50
+ # unless extractor_obj.class.set()
51
+ # raise "Some grave problem during feature extractor initialization"
52
+ # end
53
+ # }
54
+
55
+
56
+ # feature_extractors_phase2: array of AbstractFeatureExtractor objects
57
+ extractors_p2_rosy, extractors_p2_other = feature_info_object.get_extractor_objects("phase 2",
58
+ @interpreter_class)
59
+ @feature_extractors_phase2 = extractors_p2_rosy + extractors_p2_other
60
+ end
61
+
62
+ ###
63
+ # each_instance_phase1()
64
+ #
65
+ # reads the input data from file(s), in the specific input format,
66
+ # separates it into instances,
67
+ # threads it through all phase 1 feature extractors
68
+ # and yields one feature vector per instance
69
+ #
70
+ # yields: pairs [feature_name(string), feature_value(object)]
71
+
72
+ def each_instance_phase1()
73
+ Dir[@input_dir+"*.xml"]. each {|parsefilename|
74
+
75
+ xmlFile = FilePartsParser.new(parsefilename)
76
+ $stderr.puts "Processing #{parsefilename}"
77
+ xmlFile.scan_s {|sent_string|
78
+ sent = SalsaTigerSentence.new(sent_string)
79
+
80
+ # preprocessing: possibly change the SalsaTigerSentence object
81
+ # before featurization
82
+ preprocess(sent)
83
+
84
+ sent.each_frame{ |frame|
85
+
86
+ # skip failed parses
87
+ if sent.get_attribute("failed")
88
+ handle_failed_parse(sent, frame)
89
+ next
90
+ end
91
+
92
+ # Tell feature extractors about the sentence and frame:
93
+ # first Rosy feature extractors, then the others
94
+ # if there is a problem, skip this frame
95
+ unless RosyFeatureExtractor.set_sentence(sent, frame)
96
+ next
97
+ end
98
+ skip_frame = false
99
+ @extractors_p1_other.each { |extractor_obj|
100
+ unless extractor_obj.class.set_sentence(sent, frame)
101
+ skip_frame = true
102
+ break
103
+ end
104
+ }
105
+ if skip_frame
106
+ next
107
+ end
108
+
109
+ sent.each_syn_node { |syn_node|
110
+
111
+ # Tell feature extractors about the current node:
112
+ # first Rosy feature extractors, then the others
113
+ # if there is a problem, skip this node
114
+ unless RosyFeatureExtractor.set_node(syn_node)
115
+ next
116
+ end
117
+ skip_node = false
118
+ @extractors_p1_other.each { |extractor_obj|
119
+ unless extractor_obj.class.set_node(syn_node)
120
+ skip_node = true
121
+ break
122
+ end
123
+ }
124
+ if skip_node
125
+ next
126
+ end
127
+
128
+ # features: array of pairs: [feature_name(string), feature_value(object)]
129
+ features = Array.new
130
+ (@extractors_p1_rosy + @extractors_p1_other).each { |extractor|
131
+ # compute features
132
+ feature_names = extractor.class.feature_names()
133
+ feature_index = 0
134
+
135
+ # append new features to features array
136
+ features.concat extractor.compute_features().map { |feature_value|
137
+ feature_name = feature_names[feature_index]
138
+ feature_index += 1
139
+
140
+ # sanity check: feature value longer than the allotted space in the DB?
141
+ check_feature_length(feature_name, feature_value, extractor)
142
+
143
+ [feature_name, nonnil_feature(feature_value, extractor.class.sql_type()) ]
144
+ }
145
+ }
146
+ yield features
147
+ } # each syn node
148
+ } # each frame
149
+ } # each sentence
150
+ }
151
+ end
152
+
153
+ ###
154
+ # each_phase2_column
155
+ #
156
+ # This method implements the application of the
157
+ # phase 2 extractors to data.
158
+ #
159
+ # Given a database view (of either training or test data),
160
+ # assign a new feature value to each instance
161
+ #
162
+ # yields pairs [feature_name(string), feature_values(array)]
163
+ # The feature_values array has as many lines as the view has instances
164
+ # so the yield of this method can be fed directly into view.update_column()
165
+ def each_phase2_column(view) # View object: training or test data
166
+
167
+ @feature_extractors_phase2.each { |extractor|
168
+ # apply the extractor
169
+ feature_columns = extractor.compute_features_on_view(view)
170
+ # interleave with feature values and yield
171
+ feature_index = 0
172
+ feature_names = extractor.class.feature_names()
173
+ feature_columns.each { |feature_values|
174
+ yield [
175
+ feature_names[feature_index],
176
+ feature_values.map { |feature_val| nonnil_feature(feature_val, extractor.class.sql_type) }
177
+ ]
178
+ feature_index += 1
179
+ }
180
+ }
181
+ end
182
+
183
+ ###
184
+ # get_failed_parses
185
+ #
186
+ # returns the FailedParses object in which the info about failed parses has been stored
187
+ def get_failed_parses()
188
+ return @failed_parses
189
+ end
190
+
191
+ #################################
192
+ private
193
+
194
+
195
+ ###
196
+ def nonnil_feature(feature_value,
197
+ sql_type)
198
+
199
+ # feature value nil? then change to noval
200
+ if feature_value.nil? and sql_type =~ /CHAR/
201
+ return @exp.get("noval")
202
+ elsif feature_value.class.to_s == "String" and feature_value.empty?
203
+ return @exp.get("noval")
204
+ elsif feature_value.nil?
205
+ return 0
206
+ else
207
+ return feature_value
208
+ end
209
+ end
210
+
211
+ ###
212
+ # preprocess: possibly change the given SalsaTigerSentence
213
+ # to enable better learning
214
+ def preprocess(sent) # SalsaTigerSentence object
215
+
216
+
217
+ if @dataset == "train" and
218
+ (@exp.get("fe_syn_repair") or @exp.get("fe_rel_repair"))
219
+ FixSynSemMapping.fixit(sent, @exp, @interpreter_class)
220
+ end
221
+ end
222
+
223
+ ###
224
+ # register failed parses
225
+ def handle_failed_parse(sent, # SalsaTigerSentence object
226
+ frame) # FrameNode
227
+
228
+ # target POS
229
+ if frame.target()
230
+ main_target = @interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
231
+ else
232
+ main_target = nil
233
+ end
234
+ if main_target
235
+ target_pos = @interpreter_class.category(main_target)
236
+ else
237
+ target_pos = nil
238
+ end
239
+ if frame.target()
240
+ target_str = frame.target().yield_nodes_ordered().map { |t_node|
241
+ if t_node.is_syntactic?
242
+ @interpreter_class.lemma_backoff(t_node)
243
+ else
244
+ # not a syntactic node: maybe an unassigned target?
245
+ ""
246
+ end
247
+ }.join(" ")
248
+ else
249
+ target_str = ""
250
+ end
251
+
252
+ @failed_parses.register(construct_instance_id(sent.id(), frame.id()),
253
+ frame.name(),
254
+ target_str,
255
+ target_pos,
256
+ frame.children.map { |fe| fe.name })
257
+
258
+ end
259
+
260
+ ###
261
+ # sanity check: feature value longer than the allotted space in the DB?
262
+ def check_feature_length(feature_name, # string
263
+ feature_value, # object
264
+ extractor_obj) # AbstractFeatureExtractor object
265
+
266
+ if extractor_obj.class.sql_type() =~ /(\d+)/
267
+ # sql type contains some statement about the length.
268
+ # just crudely compare to feature length
269
+ length = $1.to_i
270
+ if feature_value.class == String and
271
+ feature_value.length() > length
272
+
273
+ if feature_name == "sentid"
274
+ print length;
275
+ print feature_value;
276
+ print feature_value.length();
277
+ # if the sentence (instance) ID is too long, we cannot go on.
278
+ $stderr.puts "Error: Instance ID is longer than its DB column."
279
+ $stderr.puts "Please increase the DB column size in {Tiger,Collins}FeatureExtractors.rb"
280
+ raise "SQL entry length surpassed"
281
+
282
+ elsif @exp.get("verbose")
283
+ # KE Feb 07: don't print warning,
284
+ # this is just too frequent
285
+ # for other features, we just issue a warning, and only if we are verbose
286
+
287
+ # $stderr.puts "Warning: feature #{feature_name} longer than its DB column (#{length.to_s} vs #{feature_value.length()}): #{feature_value}"
288
+ end # feature name check
289
+ end # length surpassed
290
+ end # length found in sql type
291
+
292
+ end
293
+
294
+ end
@@ -0,0 +1,115 @@
1
+ require 'common/ConfigData'
2
+
3
+ ##############################
4
+ # Class RosyConfigData
5
+ #
6
+ # inherits from ConfigData,
7
+ # sets features for ROSY
8
+
9
+ class RosyConfigData < ConfigData
10
+ def initialize(filename)
11
+ super(filename, # config file
12
+ { # features
13
+ "feature" => "list",
14
+ "classifier" => "list",
15
+
16
+ "verbose" => "bool" ,
17
+ "enduser_mode" => "bool",
18
+
19
+ "experiment_ID" => "string",
20
+
21
+ "directory_input_train" => "string",
22
+ "directory_input_test" => "string",
23
+ "directory_output" => "string",
24
+
25
+ "preproc_descr_file_train" => "string",
26
+ "preproc_descr_file_test" => "string",
27
+ "external_descr_file" => "string",
28
+
29
+ "dbtype" => "string", # "mysql" or "sqlite"
30
+
31
+ "host" => "string", # DB access: sqlite only
32
+ "user" => "string",
33
+ "passwd" => "string",
34
+ "dbname" => "string",
35
+
36
+ "data_dir" => "string", # for external use
37
+ "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
38
+
39
+ "classifier_dir" => "string", # if present, special directory for classifiers
40
+
41
+ "classif_column_name" => "string",
42
+ "main_table_name" => "pattern",
43
+ "test_table_name" => "pattern",
44
+
45
+ "eval_file" => "pattern",
46
+ "log_file" => "pattern",
47
+ "failed_file" => "pattern",
48
+ "classifier_file" => "pattern",
49
+ "classifier_output_file" => "pattern",
50
+ "noval" => "string",
51
+
52
+
53
+ "split_nones" => "bool",
54
+ "print_eval_log" => "bool",
55
+ "assume_argrec_perfect" => "bool",
56
+ "xwise_argrec" => "string",
57
+ "xwise_arglab" => "string",
58
+ "xwise_onestep" => "string",
59
+
60
+ "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
61
+ "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
62
+
63
+ "prune" => "string", # pruning prior to argrec?
64
+
65
+ },
66
+ ["exp_ID", "test_ID", "split_ID", "feature_name", "classif", "step",
67
+ "group", "dataset","mode"] # variables
68
+ )
69
+
70
+ # set access functions for list features
71
+ set_list_feature_access("feature",
72
+ method("access_feature"))
73
+
74
+ # set access functions for list features
75
+ set_list_feature_access("classifier",
76
+ method("access_feature"))
77
+
78
+ end
79
+
80
+ ###
81
+ # protected
82
+
83
+ #####
84
+ # access_feature
85
+ #
86
+ # access function for feature 'feature'
87
+ #
88
+ # assumed format in the config file:
89
+ #
90
+ # feature = path [option]*
91
+ #
92
+ # i.e. first the name of the feature type to use, then
93
+ # optionally options associated with that feature,
94
+ # e.g. 'argrec': use that feature only when computing argrec
95
+ #
96
+ # the access function is called with parameter val_list, an array of
97
+ # string tuples, one string tuple for each feature defined.
98
+ # the first string in the tuple is the feature name, the rest are the options
99
+ #
100
+ # returns: a list of pairs [feature_name(string), options(array:string)]
101
+ # of defined features
102
+ def access_feature(val_list) # array:array:string: list of tuples defined in config file
103
+ # for feature 'feature'
104
+ if val_list.nil?
105
+ return []
106
+ else
107
+ return val_list.map { |feature_descr_tuple|
108
+ [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
109
+ }
110
+ end
111
+ end
112
+ end
113
+
114
+
115
+