frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,18 @@
1
+ # Counter class - provides unique ids with state
2
+
3
+ class Counter
4
+
5
+ def get
6
+ return @v
7
+ end
8
+
9
+ def next
10
+ @v += 1
11
+ return (@v-1)
12
+ end
13
+
14
+ def initialize(init_value)
15
+ @v = init_value
16
+ end
17
+
18
+ end
@@ -0,0 +1,48 @@
1
+ # DBInterface
2
+ #
3
+ # Okay, things are getting somewhat complicated here with all
4
+ # the DB classes, but this is how it all fits together:
5
+ #
6
+ # - DBWrapper: abstract class describing the DB interface
7
+ # - DBMySQL, DBSQLite: subclasses of DBWrapper, for MySQL
8
+ # and SQLite, respectively
9
+ # - DBInterface: class to be used from outside,
10
+ # decides ( based on the experiment file) whether to use
11
+ # MySQL or SQLite and makes an object of the right kind,
12
+ # 'require'-ing either DBMySQL or DBSQLite, but not both,
13
+ # because the right ruby packages might not be installed
14
+ # for both SQL systems
15
+
16
+ def get_db_interface(exp, # experiment file object with 'dbtype' entry
17
+ dir = nil, # string: Shalmaneser directory (used by SQLite only)
18
+ identifier = nil) # string: identifier of the data (SQLite)
19
+
20
+ case exp.get("dbtype")
21
+ when "mysql"
22
+ begin
23
+ require 'rosy/DBMySQL'
24
+ rescue
25
+ $stderr.puts "Error loading DB interface."
26
+ $stderr.puts "Make sure you have the Ruby MySQL package installed."
27
+ exit 1
28
+ end
29
+ return DBMySQL.new(exp)
30
+
31
+ when "sqlite"
32
+ begin
33
+ require 'rosy/DBSQLite'
34
+ rescue
35
+ $stderr.puts "Error loading DB interface."
36
+ $stderr.puts "Make sure you have the Ruby SQLite package installed."
37
+ exit 1
38
+ end
39
+ return DBSQLite.new(exp, dir, identifier)
40
+
41
+ else
42
+ $stderr.puts "Error: database type needs to be either 'mysql' or 'sqlite'."
43
+ $stderr.puts "Please set parameter 'dbtype' in the experiment file accordingly."
44
+ exit 1
45
+ end
46
+ end
47
+
48
+
@@ -0,0 +1,27 @@
1
+ ###
2
+ # Enduser mode:
3
+ # no training, use only precompiled classifiers,
4
+ # and remove DB table with test data after applying classifiers
5
+ #
6
+ # The global variable for this, $ENDUSER_MODE, is expected to
7
+ # be set in the main program, e.g. due to some setting in the
8
+ # experiment file.
9
+
10
+ ##
11
+ # if in enduser mode, the given condition must be true,
12
+ # otherwise end execution
13
+ class Object
14
+
15
+ def in_enduser_mode_ensure(condition)
16
+ if $ENDUSER_MODE and not(condition)
17
+ $stderr.puts "Sorry, this service is unavailable in enduser mode."
18
+ exit 0
19
+ end
20
+ end
21
+
22
+ ##
23
+ # If in enduser mode, end execution
24
+ def in_enduser_mode_unavailable()
25
+ in_enduser_mode_ensure(false)
26
+ end
27
+ end
@@ -0,0 +1,480 @@
1
+ # Eval
2
+ # Katrin Erk May 05
3
+ #
4
+ # Evaluate classification results
5
+ # abstract class, has to be instantiated
6
+ # to something that can read in
7
+ # task-specific input data
8
+ #
9
+ # the Eval class provides access methods to all the
10
+ # individual evaluation results and allows for a flag that
11
+ # suppresses evaluation output to a file
12
+
13
+ require "common/ruby_class_extensions"
14
+
15
+ class Eval
16
+
17
+ # prec_group_class, rec_group_class, f_group_class:
18
+ # values for each group/class pair
19
+ # hashes "group class"(string) => score(float)
20
+ attr_reader :prec_group_class, :rec_group_class, :f_group_class
21
+
22
+ # accuracy_group:
23
+ # micro-averaged values for each group
24
+ # hash group(string) => score(float)
25
+ attr_reader :accuracy_group
26
+
27
+ # prec, rec, f, accuracy: float
28
+ # micro-averaged overall values
29
+ attr_reader :prec, :rec, :f, :accuracy
30
+
31
+ ###
32
+ # new
33
+ #
34
+ # outfilename = name of file to print results to.
35
+ # nil: print_evaluation_result() will not do anything
36
+ #
37
+ # logfilename: name of file to print instance-wise results to
38
+ # nil: no logfile output
39
+ #
40
+ # consider_only_one_class:
41
+ # compute and print evaluation for only one of the class labels,
42
+ # the one given as this argument.
43
+ # In this case, overall precision/recall/f-score
44
+ # is available instead of just accuracy, and
45
+ # no group-wise evaluation is done.
46
+ # nil: consider all classes.
47
+ def initialize(outfilename = nil,
48
+ logfilename = nil, # string:
49
+ consider_only_one_class = nil) # string/nil: evaluate only one class?
50
+
51
+ # print logfile containing
52
+ # results for every single instance?
53
+ if logfilename
54
+ @print_log = true
55
+ @logfilename = logfilename
56
+ else
57
+ @print_log = false
58
+ end
59
+ @outfilename = outfilename
60
+ @consider_only_one_class = consider_only_one_class
61
+
62
+ ###
63
+ # initialize object data:
64
+ #
65
+ # num_assigned, num_truepos, num_gold:
66
+ # hashes: [group class] (string*string) => value(integer): number of times that...
67
+ # num_assigned: ...this "group class" pair has been
68
+ # assigned by the classifier
69
+ # num_gold: ... this "group class" pair has been
70
+ # annotated in the gold standard
71
+ # num_truepos:...this "group class" pair has been
72
+ # assigned correctly by the classifier
73
+ @num_assigned = Hash.new(0)
74
+ @num_truepos = Hash.new(0)
75
+ @num_gold = Hash.new(0)
76
+
77
+ # num_instances:
78
+ # hash: group(string) -> value(integer): number of instances
79
+ # for the given group
80
+ @num_instances = Hash.new(0)
81
+
82
+ # precision, recall, f-score:
83
+ # for the format of these, see above
84
+ @prec_group_class = Hash.new(0.0)
85
+ @rec_group_class = Hash.new(0.0)
86
+ @f_group_class = Hash.new(0.0)
87
+
88
+ @accuracy_group = Hash.new(0.0)
89
+
90
+ @prec = @rec = @f = @accuracy = 0.0
91
+ end
92
+
93
+ ###
94
+ # compute
95
+ #
96
+ # do the evaluation
97
+ def compute(printme = true) # boolean: print evaluation results to file?
98
+
99
+ start_printlog()
100
+
101
+ # hash: group => value(integer): number of true positives for a group
102
+ num_truepos_group = Hash.new
103
+ # integers: overall assigned/gold/truepos/instances
104
+ num_assigned_all = 0
105
+ num_gold_all = 0
106
+ num_truepos_all = 0
107
+ num_instances_all = 0
108
+
109
+ ###
110
+ # iterate through all training/test file pairs,
111
+ # record correct/incorrect assignments
112
+ each_group { |group|
113
+
114
+ # read gold file and classifier output file in parallel
115
+ each_instance(group) { |goldclass, assigned_class|
116
+
117
+ # make sure that there are no spaces in the group name:
118
+ # later on we assume that by doing "group class".split()
119
+ # we can recover the group and the class, which won't work
120
+ # in case the group name contains spaces
121
+ mygroup = group.gsub(/ /, "_")
122
+
123
+ print_log(mygroup + " gold: " + goldclass.to_s + " " + "assigned: " + assigned_class.to_s)
124
+
125
+ # record instance
126
+ @num_instances[mygroup] += 1
127
+
128
+ # record gold standard class
129
+ if goldclass and not(goldclass.empty?) and goldclass != "-"
130
+ @num_gold[[mygroup, goldclass]] += 1
131
+ end
132
+
133
+ # record assigned classes (if present)
134
+ if assigned_class and not(assigned_class.empty?) and assigned_class != "-"
135
+ # some class has been assigned:
136
+ # record it
137
+ @num_assigned[[mygroup, assigned_class]] += 1
138
+ end
139
+
140
+ # is the assigned class included in the list of gold standard classes?
141
+ # then count this as a match
142
+ if goldclass == assigned_class
143
+ # gold file class matches assigned class
144
+ @num_truepos[[mygroup, assigned_class]] += 1
145
+
146
+ print_log(" => correct\n")
147
+
148
+ elsif assigned_class.nil? or assigned_class.empty? or assigned_class == "-"
149
+ print_log(" => unassigned\n")
150
+
151
+ else
152
+ print_log(" => incorrect\n")
153
+ end
154
+ } # each instance for this group
155
+ } # all groups
156
+
157
+
158
+ ####
159
+ # compute precision, recall, f-score
160
+
161
+ # map each group to its classes.
162
+ # groups: array of strings
163
+ # group_classes: hash group(string) -> array of classes(strings)
164
+ # if @consider_only_one_class has been set, only that class will be listed
165
+ groups = @num_gold.keys.map { |group, tclass| group }.uniq.sort
166
+ group_classes = Hash.new
167
+
168
+ # for all group/class pairs occurring either in the gold file or
169
+ # the classifier output file: record it in the group_classes hash
170
+ (@num_gold.keys.concat @num_assigned.keys).each { |group, tclass|
171
+ if group_classes[group].nil?
172
+ group_classes[group] = Array.new
173
+ end
174
+ if @consider_only_one_class and
175
+ tclass != @consider_only_one_class
176
+ # we are computing results for only one target class,
177
+ # and this is not it
178
+ next
179
+ end
180
+ if tclass
181
+ group_classes[group] << tclass
182
+ end
183
+ }
184
+ group_classes.each_key { |group|
185
+ group_classes[group] = group_classes[group].uniq.sort
186
+ }
187
+
188
+
189
+ # precision, recall, f for each group/class pair
190
+ groups.each { |group|
191
+ if group_classes[group].nil?
192
+ next
193
+ end
194
+
195
+ # iterate through all classes of the group
196
+ group_classes[group].each { |tclass|
197
+
198
+ key = [group, tclass]
199
+
200
+ # compute precision, recall, f-score
201
+ @prec_group_class[key], @rec_group_class[key], @f_group_class[key] =
202
+ prec_rec_f(@num_assigned[key], @num_gold[key], @num_truepos[key])
203
+ }
204
+ }
205
+
206
+
207
+ # micro-averaged accuracy for each group
208
+ if @consider_only_one_class
209
+ # we are computing results for only one target class,
210
+ # so precision/recall/f-score group-wise would be
211
+ # exactly the same as group+class-wise.
212
+ else
213
+ groups.each { |group|
214
+ # sum true positives over all target classes of the group
215
+ num_truepos_group[group] = @num_truepos.keys.big_sum(0) { |othergroup, tclass|
216
+ if othergroup == group
217
+ @num_truepos[[othergroup, tclass]]
218
+ else
219
+ 0
220
+ end
221
+ }
222
+
223
+ @accuracy_group[group] = accuracy(num_truepos_group[group], @num_instances[group])
224
+ }
225
+ end
226
+
227
+
228
+ # overall values:
229
+ if @consider_only_one_class
230
+ # we are computing results for only one target class,
231
+ # so overall precision/recall/f-score (micro-average) make sense
232
+
233
+ # compute precision, recall, f-score, micro-averaged
234
+ # but only include the target class we are interested in
235
+ num_assigned_all, num_gold_all, num_truepos_all = [@num_assigned, @num_gold, @num_truepos].map { |hash|
236
+ hash.keys.big_sum(0) { |group, tclass|
237
+ if tclass == @consider_only_one_class
238
+ hash[[group, tclass]]
239
+ else
240
+ 0
241
+ end
242
+ }
243
+ }
244
+
245
+ @prec, @rec, @f = prec_rec_f(num_assigned_all, num_gold_all, num_truepos_all)
246
+
247
+ # stderr output of global results
248
+ $stderr.print "Overall result: prec: ", sprintf("%.4f", @prec)
249
+ $stderr.print " rec: ", sprintf("%.4f", @rec)
250
+ $stderr.print " f: ", sprintf("%.4f", @f), "\n"
251
+
252
+ else
253
+ # we are computing results for all classes,
254
+ # so use accuracy instead of precision/recall/f-score
255
+ num_truepos_all, num_instances_all = [@num_truepos, @num_instances].map { |hash|
256
+ hash.keys.big_sum(0) { |key| hash[key] }
257
+ }
258
+ @accuracy = accuracy(num_truepos_all, num_instances_all)
259
+ # stderr output of global results
260
+ $stderr.print "Overall result: accuracy ", sprintf("%.4f", @accuracy), "\n"
261
+ end
262
+
263
+ ###
264
+ # print precision, recall, f-score to file
265
+ # (optional)
266
+ if printme
267
+ print_evaluation_result(groups, group_classes, num_truepos_group, num_instances_all, num_assigned_all, num_gold_all, num_truepos_all)
268
+ end
269
+
270
+ end_printlog()
271
+ end
272
+
273
+ #####
274
+ protected
275
+
276
+
277
+ ###
278
+ # inject_gold_counts
279
+ #
280
+ # deal with instances that failed preprocessing:
281
+ # add more gold labels that occur in the missing instances
282
+ # these are added to @num_gold
283
+ # so they lower recall.
284
+ def inject_gold_counts(group, tclass, count)
285
+ @num_gold[group + " " + tclass] += count
286
+ end
287
+
288
+ ###
289
+ # print log? if so, start logfile
290
+ def start_printlog()
291
+ if @print_log
292
+ begin
293
+ @logfile = File.new(@logfilename, "w")
294
+ $stderr.puts "Writing evaluation log to " + @logfilename
295
+ rescue
296
+ raise "Couldn't write to eval logfile"
297
+ end
298
+ else
299
+ @logfile = nil
300
+ end
301
+
302
+ end
303
+
304
+ ###
305
+ # print log? if so, end logfile
306
+ def end_printlog()
307
+ if @print_log
308
+ @logfile.close()
309
+ end
310
+ end
311
+
312
+ ###
313
+ # print log? If so, print this string to the logfile
314
+ # (no newline added)
315
+ def print_log(string) # string to be printed
316
+ if @logfile
317
+ @logfile.print string
318
+ end
319
+ end
320
+
321
+ ###
322
+ # each_group
323
+ #
324
+ # yield each group name in turn
325
+ def each_group()
326
+ raise "Abstract, please instantiate"
327
+ end
328
+
329
+ ###
330
+ # each_instance
331
+ #
332
+ # given a group name, yield each instance of this group in turn,
333
+ # or rather: yield pairs [gold_class(string), assigned_class(string)]
334
+ def each_instance(group) # string: group name
335
+ raise "Abstract, please instantiate"
336
+ end
337
+
338
+ ###
339
+ # print_evaluation_result
340
+ #
341
+ # print out all info, sense-specific, lemma-specific and overall,
342
+ # micro- and macro-averaged,
343
+ # to a file
344
+ def print_evaluation_result(groups, # array:string: group names
345
+ group_classes, # hash: group(string) => target classes (array:string)
346
+ num_truepos_group, # hash: group(string) => num true positives(integer)
347
+ num_instances_all, num_assigned_all, num_gold_all, num_truepos_all) # integers
348
+ if @outfilename.nil?
349
+ $stderr.puts "Warning: Can't print evaluation results, got not outfile name."
350
+ return
351
+ end
352
+
353
+ begin
354
+ outfile = File.new(@outfilename, "w")
355
+ rescue
356
+ raise "Couldn't write to eval file " + @outfilename
357
+ end
358
+
359
+
360
+ # print out precision, recall, f-score for each group/class pair
361
+ outfile.puts "-----------------------------"
362
+ outfile.puts "Evaluation per group/target class pair"
363
+ outfile.puts "-----------------------------"
364
+
365
+ # iterate through all groups
366
+ groups.each { |group|
367
+ if group_classes[group].nil?
368
+ next
369
+ end
370
+
371
+ outfile.puts "=============="
372
+ outfile.puts group
373
+
374
+
375
+ # iterate through all classes of the group
376
+ group_classes[group].each { |tclass|
377
+
378
+ key = [group, tclass]
379
+
380
+ outfile.print tclass, "\t", "prec: ", sprintf("%.4f", @prec_group_class[key])
381
+ outfile.print " (", @num_truepos[key], "/", @num_assigned[key], ")"
382
+
383
+ outfile.print "\trec: ", sprintf("%.4f", @rec_group_class[key])
384
+ outfile.print " (", @num_truepos[key], "/", @num_gold[key], ")"
385
+
386
+ outfile.print "\tfscore: ", sprintf("%.4f", @f_group_class[key]), "\n"
387
+ }
388
+ }
389
+
390
+
391
+ # print out evaluation for each group
392
+ unless @consider_only_one_class
393
+ outfile.puts
394
+ outfile.puts "-----------------------------"
395
+ outfile.puts "Evaluation per group"
396
+ outfile.puts "-----------------------------"
397
+
398
+ # iterate through all groups
399
+ groups.each { |group|
400
+
401
+ # micro-averaged accuracy
402
+ outfile.print group, "\t", "accuracy: ", sprintf("%.4f", @accuracy_group[group]),
403
+ " (" , num_truepos_group[group], "/", @num_instances[group], ")\n"
404
+ }
405
+ end
406
+
407
+ # print out overall evaluation
408
+ outfile.puts
409
+ outfile.puts "-----------------------------"
410
+ outfile.puts "Overall evaluation"
411
+ outfile.puts "-----------------------------"
412
+
413
+ if @consider_only_one_class
414
+
415
+ # micro average: precision, recall, f-score
416
+ outfile.print "prec: ", sprintf("%.4f", @prec)
417
+ outfile.print " (", num_truepos_all, "/", num_assigned_all, ")"
418
+
419
+ outfile.print "\trec: ", sprintf("%.4f", @rec)
420
+ outfile.print " (", num_truepos_all, "/", num_gold_all, ")"
421
+
422
+ outfile.print "\tfscore: ", sprintf("%.4f", @f), "\n"
423
+
424
+ else
425
+
426
+ # overall accuracy
427
+ outfile.print "accuracy: ", sprintf("%.4f", @accuracy)
428
+ outfile.print " (", num_truepos_all, "/", num_instances_all, ")\n"
429
+ end
430
+ outfile.flush()
431
+ end
432
+
433
+ ###
434
+ # method prec_rec_f
435
+ # assigned, gold, truepos: counts(integers)
436
+ #
437
+ # compute precision, recall, f-score:
438
+ #
439
+ # precision: true positives / assigned positives
440
+ # recall: true positives / gold positives
441
+ # f-score: 2*precision*recall / (precision + recall)
442
+ #
443
+ # return: precision, recall, f-score as floats
444
+ def prec_rec_f(assigned, gold, truepos)
445
+ # precision
446
+ precision = truepos.to_f / assigned.to_f
447
+ if precision.nan?
448
+ precision = 0.0
449
+ end
450
+
451
+ # recall
452
+ recall = truepos.to_f / gold.to_f
453
+ if recall.nan?
454
+ recall = 0.0
455
+ end
456
+
457
+ # fscore
458
+ fscore = (2 * precision * recall) / (precision + recall)
459
+ if fscore.nan?
460
+ fscore = 0.0
461
+ end
462
+
463
+ return [precision, recall, fscore]
464
+ end
465
+
466
+ ###
467
+ # accuracy:
468
+ #
469
+ # accuracy = true positives / instances
470
+ #
471
+ # returns: accuracy, a float
472
+ def accuracy(truepos, num_inst)
473
+ acc = truepos.to_f / num_inst.to_f
474
+ if acc.nan?
475
+ return 0.0
476
+ else
477
+ return acc
478
+ end
479
+ end
480
+ end