frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,345 @@
1
+ # GraphNode: describes one node in a graph.
2
+ #
3
+ # A node may have an arbitrary number of parents (sources of incoming edges)
4
+ # and an arbitrary number of children (targets of outgoing edges)
5
+ #
6
+ # All edges are labeled and directed
7
+ #
8
+ # The add_parent, add_child, remove_parent, remove_child methods
9
+ # take care of both ends of an edge
10
+ # (i.e. n1.add_child(n2, label) also adds n1 as parent of n2 with edge label 'label'
11
+ #
12
+ # It is possible to create a 'pointer' rather than an edge:
13
+ # n1.add_child(n2, label, pointer_insteadof_edge => true)
14
+ # will create an edge from n1 to n2 labeled 'label' that is
15
+ # listed under the outgoing edges of n1, but not among
16
+ # the incoming edges of n2
17
+ # The same option is available for add_parent, remove_parent, remove_child.
18
+
19
+ class GraphNode
20
+
21
+ def initialize(id)
22
+ @id = id
23
+ @children = Array.new
24
+ @parents = Array.new
25
+ @features = Hash.new
26
+ end
27
+
28
+ # for Marshalling:
29
+ # Dump just IDs instead of actual nodes from Parents and Children lists.
30
+ # Otherwise the Marshaller will go crazy following
31
+ # all the links to objects mentioned.
32
+ # After loading: replace IDs by actual objects with a little help
33
+ # from the caller.
34
+
35
+ def _dump(depth)
36
+ @id.to_s +
37
+ "QQSEPVALUESQQ" +
38
+ Marshal.dump(@features) +
39
+ "QQSEPVALUESQQ" +
40
+ @children.map { |label_child|
41
+ label_child[0] + "QQSEPQQ" + label_child[1].id()
42
+ }.join("QQPAIRQQ") +
43
+ "QQSEPVALUESQQ" +
44
+ @parents.map { |label_parent|
45
+ label_parent[0] + "QQSEPQQ" + label_parent[1].id()
46
+ }.join("QQPAIRQQ")
47
+ end
48
+
49
+ def GraphNode._load(string)
50
+ id, features_s, children_s, parents_s =
51
+ string.split("QQSEPVALUESQQ")
52
+
53
+ result = GraphNode.new(id)
54
+ result.fill_from_pickle(string)
55
+ return result
56
+ end
57
+
58
+ def fill_from_pickle(string)
59
+ id, features_s, children_s, parents_s =
60
+ string.split("QQSEPVALUESQQ")
61
+
62
+ @features = Marshal.load(features_s)
63
+
64
+ if children_s.nil? or children_s.empty?
65
+ @children = []
66
+ else
67
+ @children = children_s.split("QQPAIRQQ").map { |pair|
68
+ pair.split("QQSEPQQ")
69
+ }
70
+ end
71
+
72
+ if parents_s.nil? or parents_s.empty?
73
+ @parents = []
74
+ else
75
+ @parents = parents_s.split("QQPAIRQQ").map { |pair|
76
+ pair.split("QQSEPQQ")
77
+ }
78
+ end
79
+ end
80
+
81
+ def recover_from_dump(node_by_id)
82
+ @children = @children.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
83
+ @parents = @parents.map { |label_id| [label_id[0], node_by_id.call(label_id[1])] }
84
+ end
85
+
86
+ # ID-related things
87
+
88
+ def ==(other_node)
89
+ unless other_node.kind_of? GraphNode
90
+ return false
91
+ end
92
+ @id == other_node.id()
93
+ end
94
+
95
+ def id()
96
+ return @id
97
+ end
98
+
99
+ def chid(newid)
100
+ @id = newid
101
+ end
102
+
103
+ # setting and retrieving features
104
+
105
+ def get_f(feature)
106
+ return @features[feature]
107
+ end
108
+
109
+ def set_f(feature, value)
110
+ @features[feature] = value
111
+ end
112
+
113
+ def add_f(feature, value)
114
+ unless @features[feature].nil?
115
+ raise "Feature " + feature + "already set."
116
+ end
117
+ set_f(feature, value)
118
+ end
119
+
120
+ # ancestors
121
+
122
+ def parents()
123
+ return @parents.map { |label_parent|
124
+ label_parent[1] }
125
+ end
126
+
127
+ def parent_labels()
128
+ return @parents.map { |label_parent| label_parent[0] }
129
+ end
130
+
131
+ def parent_label(parent)
132
+ @parents.each { |label_parent|
133
+ if label_parent[1] == parent
134
+ return label_parent[0]
135
+ end
136
+ }
137
+ return nil
138
+ end
139
+
140
+ def parents_with_edgelabel()
141
+ return @parents
142
+ end
143
+
144
+ def each_parent()
145
+ @parents.each { |label_parent| yield label_parent[1] }
146
+ end
147
+
148
+ def each_parent_with_edgelabel()
149
+ @parents.each { |label_parent| yield label_parent}
150
+ end
151
+
152
+ def parents_by_edgelabels(labels)
153
+ return @parents.select { |label_parent|
154
+ labels.include? label_parent[0]
155
+ }.map { |label_parent|
156
+ label_parent[1]
157
+ }
158
+ end
159
+
160
+ def add_parent(parent, edgelabel, varhash={})
161
+ @parents << [edgelabel, parent]
162
+
163
+ # and vice versa: add self as child to parent
164
+ unless varhash["pointer_insteadof_edge"]
165
+ unless parent.children_with_edgelabel().include? [edgelabel, self]
166
+ parent.add_child(self, edgelabel)
167
+ end
168
+ end
169
+ end
170
+
171
+ def remove_parent(parent, edgelabel, varhash={})
172
+ @parents = @parents.reject { |label_child|
173
+ label_child.first == edgelabel and
174
+ label_child.last == parent
175
+ }
176
+
177
+ # and vice versa: remove self as child from parent
178
+ unless varhash["pointer_insteadof_edge"]
179
+ if parent.children_with_edgelabel().include? [edgelabel, self]
180
+ parent.remove_child(self, edgelabel)
181
+ end
182
+ end
183
+ end
184
+
185
+ def indeg()
186
+ return @parents.length()
187
+ end
188
+
189
+ def ancestors
190
+ return ancestors_noduplicates([], [])
191
+ end
192
+
193
+ def ancestors_by_edgelabels(labels)
194
+ return ancestors_noduplicates([], labels)
195
+ end
196
+
197
+ # descendants
198
+
199
+ def children()
200
+ return @children.map { |label_child| label_child[1] }
201
+ end
202
+
203
+ def child_labels()
204
+ return @children.map { |label_child| label_child[0] }
205
+ end
206
+
207
+ def child_label(child)
208
+ @children.each { |label_child|
209
+ if label_child[1] == child
210
+ return label_child[0]
211
+ end
212
+ }
213
+ return nil
214
+ end
215
+
216
+ def children_with_edgelabel()
217
+ return @children
218
+ end
219
+
220
+ def each_child()
221
+ @children.each { |label_child| yield label_child[1]}
222
+ end
223
+
224
+ def each_child_with_edgelabel()
225
+ @children.each { |label_child| yield label_child }
226
+ end
227
+
228
+ def children_by_edgelabels(labels)
229
+ return @children.select { |label_child|
230
+ labels.include? label_child[0]
231
+ }.map { |label_child|
232
+ label_child[1]
233
+ }
234
+ end
235
+
236
+ def add_child(child, edgelabel, varhash={})
237
+ @children << [edgelabel, child]
238
+
239
+ # and vice versa: add self as parent to child
240
+ unless varhash["pointer_insteadof_edge"]
241
+ unless child.parents_with_edgelabel().include? [edgelabel, self]
242
+ child.add_parent(self, edgelabel)
243
+ end
244
+ end
245
+ end
246
+
247
+ def remove_child(child, edgelabel, varhash={})
248
+ @children = @children.reject { |label_child|
249
+ label_child.first == edgelabel and
250
+ label_child.last == child
251
+ }
252
+
253
+ # and vice versa: remove self as parent from child
254
+ unless varhash["pointer_insteadof_edge"]
255
+ if child.parents_with_edgelabel().include? [edgelabel, self]
256
+ child.remove_parent(self, edgelabel)
257
+ end
258
+ end
259
+ end
260
+
261
+ def change_child_label(child, oldlabel, newlabel, varhash={})
262
+ if @children.include? [oldlabel, child]
263
+ remove_child(child,oldlabel, varhash)
264
+ add_child(child, newlabel, varhash)
265
+ end
266
+ end
267
+
268
+ def remove_all_children(varhash={})
269
+ each_child_with_edgelabel { |label, child|
270
+ remove_child(child, label, varhash)
271
+ }
272
+ end
273
+
274
+ def set_children(list, varhash={})
275
+ #### CAUTION: set_children must be called with an "internal format" list of parents:
276
+ #### instead of using [node, edgelabel], use [edgelabel, node]
277
+ remove_all_children(varhash)
278
+
279
+ @children = list
280
+ end
281
+
282
+ def outdeg()
283
+ return @children.length()
284
+ end
285
+
286
+ def yield_nodes()
287
+ arr = Array.new
288
+ if outdeg() == 0
289
+ arr << self
290
+ end
291
+ each_child { |c|
292
+ if c.outdeg() == 0
293
+ arr << c
294
+ else
295
+ arr.concat c.yield_nodes
296
+ end
297
+ }
298
+ return arr
299
+ end
300
+
301
+ def descendants
302
+ return descendants_noduplicates([], [])
303
+ end
304
+
305
+ def descendants_by_edgelabels(labels)
306
+ return descendants_noduplicates([], labels)
307
+ end
308
+
309
+ protected
310
+
311
+ def descendants_noduplicates(nodes, labels)
312
+ each_child_with_edgelabel() { |l_c|
313
+ if labels.empty? or labels.include? l_c[0]
314
+ unless nodes.include? l_c[1]
315
+ nodes = l_c[1].descendants_noduplicates(nodes << l_c[1], labels)
316
+ end
317
+ end
318
+ }
319
+ return nodes
320
+ end
321
+
322
+ def ancestors_noduplicates(nodes, labels)
323
+ each_parent_with_edgelabel() { |l_p|
324
+ if labels.empty? or labels.include? l_p[0]
325
+ unless nodes.include? l_p[1]
326
+ nodes = l_p[1].ancestors_noduplicates(nodes << l_p[1], labels)
327
+ end
328
+ end
329
+ }
330
+ return nodes
331
+ end
332
+
333
+ #### CAUTION: set_parents must be called with an "internal format" list of parents:
334
+ #### instead of using [node, edgelabel], use [edgelabel, node]
335
+
336
+ def set_parents(list, varhash={})
337
+ each_parent_with_edgelabel { |label, parent|
338
+ remove_parent(parent, label, varhash)
339
+ }
340
+
341
+ list.each { |label, parent|
342
+ add_parent(label, parent)
343
+ }
344
+ end
345
+ end
@@ -0,0 +1,24 @@
1
+ # KE changed July 05: now no inclusion of modules required,
2
+ # and names changed from REXML.Encodign to UtfIso
3
+
4
+ module UtfIso
5
+ # Convert from UTF-8
6
+ def UtfIso.to_iso_8859_1(content)
7
+ array_utf8 = content.unpack('U*')
8
+ array_enc = []
9
+ array_utf8.each do |num|
10
+ if num <= 0xFF
11
+ array_enc << num
12
+ else
13
+ # Numeric entity (&#nnnn;); shard by Stefan Scholl
14
+ # array_enc += to_iso_8859("&\##{num};").unpack('C*')
15
+ end
16
+ end
17
+ array_enc.pack('C*')
18
+ end
19
+
20
+ # Convert to UTF-8
21
+ def UtfIso.from_iso_8859_1(str)
22
+ str.unpack('C*').pack('U*')
23
+ end
24
+ end
@@ -0,0 +1,186 @@
1
+ # sp 24 08 04
2
+
3
+ # this file provides a very simple wrapper for using different ML systems
4
+ # all you need to do is to write the appropriate learner class
5
+ # and insert them in the initialize routine here in ML()
6
+ #
7
+ # available at the moment:
8
+ # * timbl (memory-based learner)
9
+ # * mallet-maxent (another maxent system)
10
+ # * maxent (the OpenNLP maxent system)
11
+
12
+ # part of contract: learner is not initialised unless it is either trained or read
13
+
14
+ require "common/Optimise"
15
+
16
+ class Classifier
17
+
18
+ @@learners = [
19
+ ["timbl", "Timbl", "Timbl"],
20
+ # ["mallet", "Mallet", "Mallet"],
21
+ ["maxent", "Maxent", "Maxent"]
22
+ ]
23
+
24
+ def initialize(learner,params)
25
+
26
+ @ready = false
27
+
28
+ if params[0] == "optimise"
29
+ params.shift
30
+ @optimise = true
31
+ else
32
+ @optimise = false
33
+ end
34
+
35
+ program_path = ""
36
+ begin
37
+ program_path = params.shift.chomp
38
+ unless FileTest.exist? program_path
39
+ $stderr.puts "Error: Could not find classifier system at " + program_path
40
+ $stderr.puts "Perhaps an erroneous entry in your experiment file?"
41
+ exit 1
42
+ end
43
+ rescue NoMethodError
44
+ $stderr.puts "Error: No program path provided for classifier system."
45
+ end
46
+
47
+ # try to find our learner in the pre-set list of learners
48
+ learner_tuple = @@learners.assoc(learner)
49
+ unless learner_tuple
50
+ $stderr.puts "Error: I don't know the learner " + learner.to_s
51
+ $stderr.puts "Perhaps an erroneous entry in your experiment file?"
52
+ exit 1
53
+ end
54
+
55
+ learner_name, learner_filename, learner_classname = learner_tuple
56
+ require "common/#{learner_filename}"
57
+ @learner = eval(learner_classname).new(program_path,params)
58
+ end
59
+
60
+ # a classifier can (and has to be) either trained or read
61
+ def train(trainfile, classifier_file=nil)
62
+ # train on the training data in trainfile
63
+ # make sure we produce a valid file name
64
+
65
+ # it is possible to directly specify a filename for storing the classifier
66
+
67
+ trainfile.gsub!(/[<>]/,"")
68
+ trainfile.gsub!(/ /,"_")
69
+ if @optimise
70
+ STDERR.puts "[ML] using feature optimisation"
71
+ @optimiser = Optimise.new
72
+ @optimiser.init_from_data(trainfile)
73
+ optimisedfile = trainfile+".opted"
74
+ @optimiser.apply(trainfile,optimisedfile)
75
+ @learner.train(optimisedfile,classifier_file)
76
+ File.delete(optimisedfile)
77
+ else
78
+ STDERR.puts "[ML] no feature optimisation"
79
+ @learner.train(trainfile,classifier_file)
80
+ end
81
+ @ready = true
82
+ end
83
+
84
+
85
+ # returns true iff reading the classifier from the file has had success
86
+
87
+ def read(classifier_file)
88
+ # make sure we produce a valid file name
89
+ classifier_file.gsub!(/[<>]/,"")
90
+ classifier_file.gsub!(/ /,"_")
91
+
92
+ # read file, if present
93
+
94
+ status = @learner.read(classifier_file)
95
+
96
+ # if reading has failed, return "false"
97
+ unless status
98
+ STDERR.puts "reading from #{classifier_file} did not succeed"
99
+ return status
100
+ end
101
+
102
+ # read optimisation, if desired
103
+ if @optimise
104
+ optimisations_filename = Optimise.recommended_filename(classifier_file)
105
+ unless FileTest.exists? optimisations_filename
106
+ STDERR.puts "[ML] Error: attempted to read stored optimisation, but file does not exist"
107
+ return false
108
+ else
109
+ @optimiser = Optimise.new
110
+ @optimiser.init_from_file(optimisations_filename)
111
+ end
112
+ end
113
+
114
+ @ready = true
115
+ return true
116
+
117
+ end
118
+
119
+ # a classifier can be stored somewhere. This can be more than one file (classifier-specific),
120
+ # but all files start with "classifier_file"
121
+
122
+ def write(classifier_file)
123
+ # make sure we produce a valid file name
124
+ classifier_file.gsub!(/[<>]/,"")
125
+ classifier_file.gsub!(/ /,"_")
126
+ @learner.write(classifier_file)
127
+ if @optimise
128
+ @optimiser.store(Optimise.recommended_filename(classifier_file))
129
+ end
130
+ end
131
+
132
+ ###
133
+ # exists?
134
+ # check if a classifier is living at some particular path
135
+
136
+ def exists?(classifier_file)
137
+ classifier_file.gsub!(/[<>]/,"")
138
+ classifier_file.gsub!(/ /,"_")
139
+ return @learner.exists?(classifier_file)
140
+ end
141
+
142
+ # a classifier can be applied
143
+
144
+ # returns true iff application has had success
145
+
146
+ def apply(testfile,outfile) # test either on the training or the test data in the specified dir
147
+ # make sure we produce a valid file name
148
+ testfile.gsub!(/[<>]/,"")
149
+ testfile.gsub!(/ /,"_")
150
+ # make sure we produce a valid file name
151
+ outfile.gsub!(/[<>]/,"")
152
+ outfile.gsub!(/ /,"_")
153
+
154
+ unless @ready
155
+ STDERR.puts "[ML] Warning: learner not ready for testing! Must be trained or read."
156
+ return false
157
+ end
158
+
159
+ # do we have a testfile?
160
+
161
+ unless FileTest.exists?(testfile)
162
+ STDERR.puts "[ML] Warning: could not find testfile (maybe empty test set?)."
163
+ return false
164
+ end
165
+
166
+ if @optimise
167
+ optimisedfile = testfile+".opted"
168
+ @optimiser.apply(testfile,optimisedfile)
169
+ return @learner.apply(optimisedfile,outfile)
170
+ File.delete(optimisedfile)
171
+ else
172
+ return @learner.apply(testfile,outfile)
173
+ end
174
+
175
+ end
176
+
177
+ ###
178
+ # read classifier result file,
179
+ # returns a list of instance_results
180
+ # where an instance_result is a list of pairs [label, confidence]
181
+ # where the pairs are sorted by confidence
182
+ def read_resultfile(file)
183
+ return @learner.read_resultfile(file)
184
+ end
185
+
186
+ end