frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,18 @@
1
+ # Counter class - provides unique ids with state
2
+
3
+ class Counter
4
+
5
+ def get
6
+ return @v
7
+ end
8
+
9
+ def next
10
+ @v += 1
11
+ return (@v-1)
12
+ end
13
+
14
+ def initialize(init_value)
15
+ @v = init_value
16
+ end
17
+
18
+ end
@@ -0,0 +1,48 @@
1
+ # DBInterface
2
+ #
3
+ # Okay, things are getting somewhat complicated here with all
4
+ # the DB classes, but this is how it all fits together:
5
+ #
6
+ # - DBWrapper: abstract class describing the DB interface
7
+ # - DBMySQL, DBSQLite: subclasses of DBWrapper, for MySQL
8
+ # and SQLite, respectively
9
+ # - DBInterface: class to be used from outside,
10
+ # decides ( based on the experiment file) whether to use
11
+ # MySQL or SQLite and makes an object of the right kind,
12
+ # 'require'-ing either DBMySQL or DBSQLite, but not both,
13
+ # because the right ruby packages might not be installed
14
+ # for both SQL systems
15
+
16
+ def get_db_interface(exp, # experiment file object with 'dbtype' entry
17
+ dir = nil, # string: Shalmaneser directory (used by SQLite only)
18
+ identifier = nil) # string: identifier of the data (SQLite)
19
+
20
+ case exp.get("dbtype")
21
+ when "mysql"
22
+ begin
23
+ require 'rosy/DBMySQL'
24
+ rescue
25
+ $stderr.puts "Error loading DB interface."
26
+ $stderr.puts "Make sure you have the Ruby MySQL package installed."
27
+ exit 1
28
+ end
29
+ return DBMySQL.new(exp)
30
+
31
+ when "sqlite"
32
+ begin
33
+ require 'rosy/DBSQLite'
34
+ rescue
35
+ $stderr.puts "Error loading DB interface."
36
+ $stderr.puts "Make sure you have the Ruby SQLite package installed."
37
+ exit 1
38
+ end
39
+ return DBSQLite.new(exp, dir, identifier)
40
+
41
+ else
42
+ $stderr.puts "Error: database type needs to be either 'mysql' or 'sqlite'."
43
+ $stderr.puts "Please set parameter 'dbtype' in the experiment file accordingly."
44
+ exit 1
45
+ end
46
+ end
47
+
48
+
@@ -0,0 +1,27 @@
1
+ ###
2
+ # Enduser mode:
3
+ # no training, use only precompiled classifiers,
4
+ # and remove DB table with test data after applying classifiers
5
+ #
6
+ # The global variable for this, $ENDUSER_MODE, is expected to
7
+ # be set in the main program, e.g. due to some setting in the
8
+ # experiment file.
9
+
10
+ ##
11
+ # if in enduser mode, the given condition must be true,
12
+ # otherwise end execution
13
+ class Object
14
+
15
+ def in_enduser_mode_ensure(condition)
16
+ if $ENDUSER_MODE and not(condition)
17
+ $stderr.puts "Sorry, this service is unavailable in enduser mode."
18
+ exit 0
19
+ end
20
+ end
21
+
22
+ ##
23
+ # If in enduser mode, end execution
24
+ def in_enduser_mode_unavailable()
25
+ in_enduser_mode_ensure(false)
26
+ end
27
+ end
@@ -0,0 +1,480 @@
1
+ # Eval
2
+ # Katrin Erk May 05
3
+ #
4
+ # Evaluate classification results
5
+ # abstract class, has to be instantiated
6
+ # to something that can read in
7
+ # task-specific input data
8
+ #
9
+ # the Eval class provides access methods to all the
10
+ # individual evaluation results and allows for a flag that
11
+ # suppresses evaluation output to a file
12
+
13
+ require "common/ruby_class_extensions"
14
+
15
+ class Eval
16
+
17
+ # prec_group_class, rec_group_class, f_group_class:
18
+ # values for each group/class pair
19
+ # hashes "group class"(string) => score(float)
20
+ attr_reader :prec_group_class, :rec_group_class, :f_group_class
21
+
22
+ # accuracy_group:
23
+ # micro-averaged values for each group
24
+ # hash group(string) => score(float)
25
+ attr_reader :accuracy_group
26
+
27
+ # prec, rec, f, accuracy: float
28
+ # micro-averaged overall values
29
+ attr_reader :prec, :rec, :f, :accuracy
30
+
31
+ ###
32
+ # new
33
+ #
34
+ # outfilename = name of file to print results to.
35
+ # nil: print_evaluation_result() will not do anything
36
+ #
37
+ # logfilename: name of file to print instance-wise results to
38
+ # nil: no logfile output
39
+ #
40
+ # consider_only_one_class:
41
+ # compute and print evaluation for only one of the class labels,
42
+ # the one given as this argument.
43
+ # In this case, overall precision/recall/f-score
44
+ # is available instead of just accuracy, and
45
+ # no group-wise evaluation is done.
46
+ # nil: consider all classes.
47
+ def initialize(outfilename = nil,
48
+ logfilename = nil, # string:
49
+ consider_only_one_class = nil) # string/nil: evaluate only one class?
50
+
51
+ # print logfile containing
52
+ # results for every single instance?
53
+ if logfilename
54
+ @print_log = true
55
+ @logfilename = logfilename
56
+ else
57
+ @print_log = false
58
+ end
59
+ @outfilename = outfilename
60
+ @consider_only_one_class = consider_only_one_class
61
+
62
+ ###
63
+ # initialize object data:
64
+ #
65
+ # num_assigned, num_truepos, num_gold:
66
+ # hashes: [group class] (string*string) => value(integer): number of times that...
67
+ # num_assigned: ...this "group class" pair has been
68
+ # assigned by the classifier
69
+ # num_gold: ... this "group class" pair has been
70
+ # annotated in the gold standard
71
+ # num_truepos:...this "group class" pair has been
72
+ # assigned correctly by the classifier
73
+ @num_assigned = Hash.new(0)
74
+ @num_truepos = Hash.new(0)
75
+ @num_gold = Hash.new(0)
76
+
77
+ # num_instances:
78
+ # hash: group(string) -> value(integer): number of instances
79
+ # for the given group
80
+ @num_instances = Hash.new(0)
81
+
82
+ # precision, recall, f-score:
83
+ # for the format of these, see above
84
+ @prec_group_class = Hash.new(0.0)
85
+ @rec_group_class = Hash.new(0.0)
86
+ @f_group_class = Hash.new(0.0)
87
+
88
+ @accuracy_group = Hash.new(0.0)
89
+
90
+ @prec = @rec = @f = @accuracy = 0.0
91
+ end
92
+
93
+ ###
94
+ # compute
95
+ #
96
+ # do the evaluation
97
+ def compute(printme = true) # boolean: print evaluation results to file?
98
+
99
+ start_printlog()
100
+
101
+ # hash: group => value(integer): number of true positives for a group
102
+ num_truepos_group = Hash.new
103
+ # integers: overall assigned/gold/truepos/instances
104
+ num_assigned_all = 0
105
+ num_gold_all = 0
106
+ num_truepos_all = 0
107
+ num_instances_all = 0
108
+
109
+ ###
110
+ # iterate through all training/test file pairs,
111
+ # record correct/incorrect assignments
112
+ each_group { |group|
113
+
114
+ # read gold file and classifier output file in parallel
115
+ each_instance(group) { |goldclass, assigned_class|
116
+
117
+ # make sure that there are no spaces in the group name:
118
+ # later on we assume that by doing "group class".split()
119
+ # we can recover the group and the class, which won't work
120
+ # in case the group name contains spaces
121
+ mygroup = group.gsub(/ /, "_")
122
+
123
+ print_log(mygroup + " gold: " + goldclass.to_s + " " + "assigned: " + assigned_class.to_s)
124
+
125
+ # record instance
126
+ @num_instances[mygroup] += 1
127
+
128
+ # record gold standard class
129
+ if goldclass and not(goldclass.empty?) and goldclass != "-"
130
+ @num_gold[[mygroup, goldclass]] += 1
131
+ end
132
+
133
+ # record assigned classes (if present)
134
+ if assigned_class and not(assigned_class.empty?) and assigned_class != "-"
135
+ # some class has been assigned:
136
+ # record it
137
+ @num_assigned[[mygroup, assigned_class]] += 1
138
+ end
139
+
140
+ # is the assigned class included in the list of gold standard classes?
141
+ # then count this as a match
142
+ if goldclass == assigned_class
143
+ # gold file class matches assigned class
144
+ @num_truepos[[mygroup, assigned_class]] += 1
145
+
146
+ print_log(" => correct\n")
147
+
148
+ elsif assigned_class.nil? or assigned_class.empty? or assigned_class == "-"
149
+ print_log(" => unassigned\n")
150
+
151
+ else
152
+ print_log(" => incorrect\n")
153
+ end
154
+ } # each instance for this group
155
+ } # all groups
156
+
157
+
158
+ ####
159
+ # compute precision, recall, f-score
160
+
161
+ # map each group to its classes.
162
+ # groups: array of strings
163
+ # group_classes: hash group(string) -> array of classes(strings)
164
+ # if @consider_only_one_class has been set, only that class will be listed
165
+ groups = @num_gold.keys.map { |group, tclass| group }.uniq.sort
166
+ group_classes = Hash.new
167
+
168
+ # for all group/class pairs occurring either in the gold file or
169
+ # the classifier output file: record it in the group_classes hash
170
+ (@num_gold.keys.concat @num_assigned.keys).each { |group, tclass|
171
+ if group_classes[group].nil?
172
+ group_classes[group] = Array.new
173
+ end
174
+ if @consider_only_one_class and
175
+ tclass != @consider_only_one_class
176
+ # we are computing results for only one target class,
177
+ # and this is not it
178
+ next
179
+ end
180
+ if tclass
181
+ group_classes[group] << tclass
182
+ end
183
+ }
184
+ group_classes.each_key { |group|
185
+ group_classes[group] = group_classes[group].uniq.sort
186
+ }
187
+
188
+
189
+ # precision, recall, f for each group/class pair
190
+ groups.each { |group|
191
+ if group_classes[group].nil?
192
+ next
193
+ end
194
+
195
+ # iterate through all classes of the group
196
+ group_classes[group].each { |tclass|
197
+
198
+ key = [group, tclass]
199
+
200
+ # compute precision, recall, f-score
201
+ @prec_group_class[key], @rec_group_class[key], @f_group_class[key] =
202
+ prec_rec_f(@num_assigned[key], @num_gold[key], @num_truepos[key])
203
+ }
204
+ }
205
+
206
+
207
+ # micro-averaged accuracy for each group
208
+ if @consider_only_one_class
209
+ # we are computing results for only one target class,
210
+ # so precision/recall/f-score group-wise would be
211
+ # exactly the same as group+class-wise.
212
+ else
213
+ groups.each { |group|
214
+ # sum true positives over all target classes of the group
215
+ num_truepos_group[group] = @num_truepos.keys.big_sum(0) { |othergroup, tclass|
216
+ if othergroup == group
217
+ @num_truepos[[othergroup, tclass]]
218
+ else
219
+ 0
220
+ end
221
+ }
222
+
223
+ @accuracy_group[group] = accuracy(num_truepos_group[group], @num_instances[group])
224
+ }
225
+ end
226
+
227
+
228
+ # overall values:
229
+ if @consider_only_one_class
230
+ # we are computing results for only one target class,
231
+ # so overall precision/recall/f-score (micro-average) make sense
232
+
233
+ # compute precision, recall, f-score, micro-averaged
234
+ # but only include the target class we are interested in
235
+ num_assigned_all, num_gold_all, num_truepos_all = [@num_assigned, @num_gold, @num_truepos].map { |hash|
236
+ hash.keys.big_sum(0) { |group, tclass|
237
+ if tclass == @consider_only_one_class
238
+ hash[[group, tclass]]
239
+ else
240
+ 0
241
+ end
242
+ }
243
+ }
244
+
245
+ @prec, @rec, @f = prec_rec_f(num_assigned_all, num_gold_all, num_truepos_all)
246
+
247
+ # stderr output of global results
248
+ $stderr.print "Overall result: prec: ", sprintf("%.4f", @prec)
249
+ $stderr.print " rec: ", sprintf("%.4f", @rec)
250
+ $stderr.print " f: ", sprintf("%.4f", @f), "\n"
251
+
252
+ else
253
+ # we are computing results for all classes,
254
+ # so use accuracy instead of precision/recall/f-score
255
+ num_truepos_all, num_instances_all = [@num_truepos, @num_instances].map { |hash|
256
+ hash.keys.big_sum(0) { |key| hash[key] }
257
+ }
258
+ @accuracy = accuracy(num_truepos_all, num_instances_all)
259
+ # stderr output of global results
260
+ $stderr.print "Overall result: accuracy ", sprintf("%.4f", @accuracy), "\n"
261
+ end
262
+
263
+ ###
264
+ # print precision, recall, f-score to file
265
+ # (optional)
266
+ if printme
267
+ print_evaluation_result(groups, group_classes, num_truepos_group, num_instances_all, num_assigned_all, num_gold_all, num_truepos_all)
268
+ end
269
+
270
+ end_printlog()
271
+ end
272
+
273
+ #####
274
+ protected
275
+
276
+
277
+ ###
278
+ # inject_gold_counts
279
+ #
280
+ # deal with instances that failed preprocessing:
281
+ # add more gold labels that occur in the missing instances
282
+ # these are added to @num_gold
283
+ # so they lower recall.
284
+ def inject_gold_counts(group, tclass, count)
285
+ @num_gold[group + " " + tclass] += count
286
+ end
287
+
288
+ ###
289
+ # print log? if so, start logfile
290
+ def start_printlog()
291
+ if @print_log
292
+ begin
293
+ @logfile = File.new(@logfilename, "w")
294
+ $stderr.puts "Writing evaluation log to " + @logfilename
295
+ rescue
296
+ raise "Couldn't write to eval logfile"
297
+ end
298
+ else
299
+ @logfile = nil
300
+ end
301
+
302
+ end
303
+
304
+ ###
305
+ # print log? if so, end logfile
306
+ def end_printlog()
307
+ if @print_log
308
+ @logfile.close()
309
+ end
310
+ end
311
+
312
+ ###
313
+ # print log? If so, print this string to the logfile
314
+ # (no newline added)
315
+ def print_log(string) # string to be printed
316
+ if @logfile
317
+ @logfile.print string
318
+ end
319
+ end
320
+
321
+ ###
322
+ # each_group
323
+ #
324
+ # yield each group name in turn
325
+ def each_group()
326
+ raise "Abstract, please instantiate"
327
+ end
328
+
329
+ ###
330
+ # each_instance
331
+ #
332
+ # given a group name, yield each instance of this group in turn,
333
+ # or rather: yield pairs [gold_class(string), assigned_class(string)]
334
+ def each_instance(group) # string: group name
335
+ raise "Abstract, please instantiate"
336
+ end
337
+
338
+ ###
339
+ # print_evaluation_result
340
+ #
341
+ # print out all info, sense-specific, lemma-specific and overall,
342
+ # micro- and macro-averaged,
343
+ # to a file
344
+ def print_evaluation_result(groups, # array:string: group names
345
+ group_classes, # hash: group(string) => target classes (array:string)
346
+ num_truepos_group, # hash: group(string) => num true positives(integer)
347
+ num_instances_all, num_assigned_all, num_gold_all, num_truepos_all) # integers
348
+ if @outfilename.nil?
349
+ $stderr.puts "Warning: Can't print evaluation results, got not outfile name."
350
+ return
351
+ end
352
+
353
+ begin
354
+ outfile = File.new(@outfilename, "w")
355
+ rescue
356
+ raise "Couldn't write to eval file " + @outfilename
357
+ end
358
+
359
+
360
+ # print out precision, recall, f-score for each group/class pair
361
+ outfile.puts "-----------------------------"
362
+ outfile.puts "Evaluation per group/target class pair"
363
+ outfile.puts "-----------------------------"
364
+
365
+ # iterate through all groups
366
+ groups.each { |group|
367
+ if group_classes[group].nil?
368
+ next
369
+ end
370
+
371
+ outfile.puts "=============="
372
+ outfile.puts group
373
+
374
+
375
+ # iterate through all classes of the group
376
+ group_classes[group].each { |tclass|
377
+
378
+ key = [group, tclass]
379
+
380
+ outfile.print tclass, "\t", "prec: ", sprintf("%.4f", @prec_group_class[key])
381
+ outfile.print " (", @num_truepos[key], "/", @num_assigned[key], ")"
382
+
383
+ outfile.print "\trec: ", sprintf("%.4f", @rec_group_class[key])
384
+ outfile.print " (", @num_truepos[key], "/", @num_gold[key], ")"
385
+
386
+ outfile.print "\tfscore: ", sprintf("%.4f", @f_group_class[key]), "\n"
387
+ }
388
+ }
389
+
390
+
391
+ # print out evaluation for each group
392
+ unless @consider_only_one_class
393
+ outfile.puts
394
+ outfile.puts "-----------------------------"
395
+ outfile.puts "Evaluation per group"
396
+ outfile.puts "-----------------------------"
397
+
398
+ # iterate through all groups
399
+ groups.each { |group|
400
+
401
+ # micro-averaged accuracy
402
+ outfile.print group, "\t", "accuracy: ", sprintf("%.4f", @accuracy_group[group]),
403
+ " (" , num_truepos_group[group], "/", @num_instances[group], ")\n"
404
+ }
405
+ end
406
+
407
+ # print out overall evaluation
408
+ outfile.puts
409
+ outfile.puts "-----------------------------"
410
+ outfile.puts "Overall evaluation"
411
+ outfile.puts "-----------------------------"
412
+
413
+ if @consider_only_one_class
414
+
415
+ # micro average: precision, recall, f-score
416
+ outfile.print "prec: ", sprintf("%.4f", @prec)
417
+ outfile.print " (", num_truepos_all, "/", num_assigned_all, ")"
418
+
419
+ outfile.print "\trec: ", sprintf("%.4f", @rec)
420
+ outfile.print " (", num_truepos_all, "/", num_gold_all, ")"
421
+
422
+ outfile.print "\tfscore: ", sprintf("%.4f", @f), "\n"
423
+
424
+ else
425
+
426
+ # overall accuracy
427
+ outfile.print "accuracy: ", sprintf("%.4f", @accuracy)
428
+ outfile.print " (", num_truepos_all, "/", num_instances_all, ")\n"
429
+ end
430
+ outfile.flush()
431
+ end
432
+
433
+ ###
434
+ # method prec_rec_f
435
+ # assigned, gold, truepos: counts(integers)
436
+ #
437
+ # compute precision, recall, f-score:
438
+ #
439
+ # precision: true positives / assigned positives
440
+ # recall: true positives / gold positives
441
+ # f-score: 2*precision*recall / (precision + recall)
442
+ #
443
+ # return: precision, recall, f-score as floats
444
+ def prec_rec_f(assigned, gold, truepos)
445
+ # precision
446
+ precision = truepos.to_f / assigned.to_f
447
+ if precision.nan?
448
+ precision = 0.0
449
+ end
450
+
451
+ # recall
452
+ recall = truepos.to_f / gold.to_f
453
+ if recall.nan?
454
+ recall = 0.0
455
+ end
456
+
457
+ # fscore
458
+ fscore = (2 * precision * recall) / (precision + recall)
459
+ if fscore.nan?
460
+ fscore = 0.0
461
+ end
462
+
463
+ return [precision, recall, fscore]
464
+ end
465
+
466
+ ###
467
+ # accuracy:
468
+ #
469
+ # accuracy = true positives / instances
470
+ #
471
+ # returns: accuracy, a float
472
+ def accuracy(truepos, num_inst)
473
+ acc = truepos.to_f / num_inst.to_f
474
+ if acc.nan?
475
+ return 0.0
476
+ else
477
+ return acc
478
+ end
479
+ end
480
+ end