shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,465 +0,0 @@
1
- # RosyEval
2
- # KE May 05
3
- #
4
- # Evaluation for Rosy:
5
- # Precision, Recall, F-score
6
- # Output to evaluation file,
7
- # plus optional output of evaluation log file.
8
- #
9
- # Builds on the general Salsa Eval package
10
-
11
- # Salsa packages
12
- require "common/Eval"
13
- require "common/ruby_class_extensions"
14
-
15
- # Rosy packages
16
- require "rosy/RosyIterator"
17
- require "rosy/RosySplit"
18
- require "rosy/RosyTask"
19
- require "rosy/RosyPruning"
20
-
21
- # Frprep packages
22
- require "common/prep_config_data"
23
-
24
- #######################################################################
25
- # This class is a subclass of the general evaluation class
26
- # Eval, which makes evaluation results readable via
27
- # readable object variables
28
- #
29
- # step: can be argrec, arglab, onestep, as usual, but also
30
- # - "all":
31
- # evaluate argrec and arglab together.
32
- # When argrec == NONE, use the argrec value, else use the arglab value
33
- # - "prune":
34
- # evaluate the pruning column as if it were an argrec assignment
35
- #
36
- # When step == argrec or prune, evaluate _only_ the target class FE
37
- # Otherwise, evaluate all target classes
38
- class RosyEval < Eval
39
- def initialize(exp, # RosyConfigData object: experiment file
40
- ttt_obj, # RosyTrainingTestTable object
41
- step, # string: argrec, arglab, onestep, all, prune
42
- splitID, # string: splitlog ID, or nil
43
- testID, # string: test ID, or nil
44
- outfilename, # string: name of file to print output to
45
- logfilename, # string: name of file to print eval log to (may be nil)
46
- dont_adjoin_frprep_exp) # string: if non-nil, don't re-adjoin frprep experiment obj
47
- @exp = exp
48
- @step = step
49
-
50
- if outfilename
51
- $stderr.puts "Rosy evaluation: printing results to " + outfilename
52
- end
53
- if logfilename
54
- $stderr.puts "and printing an evaluation log to " + logfilename
55
- end
56
-
57
- ##
58
- # add preprocessing information to the experiment file object
59
- unless dont_adjoin_frprep_exp
60
- if splitID
61
- # use split data
62
- preproc_expname = @exp.get("preproc_descr_file_train")
63
- else
64
- # use test data
65
- preproc_expname = @exp.get("preproc_descr_file_test")
66
- end
67
- if not(preproc_expname)
68
- $stderr.puts "Please set the name of the preprocessing exp. file name"
69
- $stderr.puts "in the experiment file."
70
- exit 1
71
- elsif not(File.readable?(preproc_expname))
72
- $stderr.puts "Error in the experiment file:"
73
- $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
74
- exit 1
75
- end
76
- preproc_exp = FrPrepConfigData.new(preproc_expname)
77
- @exp.adjoin(preproc_exp)
78
- end
79
-
80
- ##
81
- # evaluate which labels?
82
- if ["argrec", "prune"].include? @step
83
- # evaluate only the label "FE"
84
- super(outfilename, logfilename, "FE")
85
- else
86
- # evaluate all target classes
87
- super(outfilename, logfilename)
88
- end
89
-
90
- ##
91
- # what are classifier columns?
92
- case @step
93
- when "all"
94
- # read one argrec and one arglab classifier run column
95
- @classif_column_argrec = ttt_obj.existing_runlog("argrec", "test", testID,splitID)
96
- @classif_column_arglab = ttt_obj.existing_runlog("arglab", "test", testID,splitID)
97
- @columns = ["gold", @classif_column_argrec, @classif_column_arglab]
98
-
99
- if @classif_column_argrec.nil? or @classif_column_arglab.nil?
100
- # no run found for the given specifications
101
- $stderr.puts "Couldn't determine the run to evaluate."
102
- $stderr.puts "There were either none or too many possible runs given your specification.\n"
103
- $stderr.puts "Here is a list of all runs the system knows for this experiment ID:\n\n"
104
- $stderr.puts ttt_obj.runlog_to_s("test", testID, splitID)
105
- exit 1
106
- end
107
-
108
- when "prune"
109
- # read pruning column, evaluate as a kind of argrec assignment
110
- unless Pruning.prune?(@exp)
111
- raise "Error: Pruning evaluation without pruning column. Skipping."
112
- end
113
- @classif_column = Pruning.colname(@exp)
114
- @columns = ["gold", @classif_column]
115
-
116
- else
117
- # read the classifier run column for the current step
118
- @classif_column = ttt_obj.existing_runlog(@step, "test", testID,splitID)
119
- @columns = ["gold", @classif_column]
120
-
121
- if @classif_column.nil?
122
- # no run found for the given specifications
123
- $stderr.puts "Couldn't determine the run to evaluate."
124
- $stderr.puts "There were either none or too many possible runs given your specification.\n"
125
- $stderr.puts "Here is a list of all runs the system knows for this experiment ID:\n\n"
126
- $stderr.puts ttt_obj.runlog_to_s("test", testID, splitID)
127
- exit 1
128
- end
129
- end
130
-
131
- ##
132
- # make object for iterating through groups and making views
133
- case @step
134
- when "all"
135
- # all: no step in particular
136
- @iterator = RosyIterator.new(ttt_obj, exp, "test",
137
- "step" => nil,
138
- "testID" => testID,
139
- "splitID" => splitID,
140
- "xwise" => "frame")
141
- when "prune"
142
- # prune: use argrec
143
- @iterator = RosyIterator.new(ttt_obj, exp, "test",
144
- "step" => "argrec",
145
- "testID" => testID,
146
- "splitID" => splitID)
147
-
148
- else
149
- # use the given step
150
- @iterator = RosyIterator.new(ttt_obj, exp, "test",
151
- "step" => @step,
152
- "testID" => testID,
153
- "splitID" => splitID)
154
- end
155
-
156
- ##
157
- # xwise
158
- if @step == "all"
159
- # argrec and arglab may have different xwises,
160
- # which would create trouble.
161
- # just use "frame" instead
162
- @xwise = ["frame"]
163
- else
164
- # evaluate as you have trained and tested
165
- @xwise = @iterator.get_xwise_column_names()
166
- end
167
-
168
- ##
169
- # split? then include FE labels from unparsed sentences
170
- # in count of gold labels
171
- if splitID
172
- # get a FailedParses object for this split
173
- @failed_parses_split = FailedParses.new()
174
- fp_filename = File.new_filename(@exp.instantiate("rosy_dir",
175
- "exp_ID" => @exp.get("experiment_ID")),
176
- @exp.instantiate("failed_file",
177
- "exp_ID" => @exp.get("experiment_ID"),
178
- "split_ID" => splitID,
179
- "dataset" => "test"))
180
- @failed_parses_split.load(fp_filename)
181
- end
182
-
183
- # announce the task
184
- $stderr.puts "---------"
185
- $stderr.print "Rosy experiment #{@exp.get("experiment_ID")}: Evaluating "
186
- if splitID
187
- $stderr.puts "on split dataset #{splitID}"
188
- else
189
- $stderr.puts "on test dataset #{testID}"
190
- end
191
- $stderr.puts "---------"
192
- end
193
-
194
- ###
195
- protected
196
-
197
- ###
198
- # each_group
199
- #
200
- # yield each group name in turn
201
- def each_group()
202
-
203
- @view = nil
204
-
205
- # for the sake of the failed parses module:
206
- # it can split the failed parses by frame, target and target_pos,
207
- # but if our "xwise" splits the data along any further columns,
208
- # the failed parses module cannot know how to split up its failed parses.
209
- # so see whether we've got any column names besides the three named above
210
- # in our xwise,
211
- # and if so, count the groups and split the failed parses evenly between them
212
- normal_xwise_cols = ["frame", "target", "target_pos"] & @xwise
213
- extra_xwise_cols = @xwise - normal_xwise_cols
214
-
215
- # num_groups_for_normalxwise: hash: normal_xwise_values(string) -> num. of
216
- # groups with these normal xwise values(integer)
217
- # where the key normal_xwise_values is a conjunction of
218
- # strings <col_name>=<value> joined by commas,
219
- # and the column names are in alphabetical order
220
- num_groups_for_normalxwise = Hash.new(0)
221
-
222
- unless extra_xwise_cols.empty?
223
- # we do have extra columns
224
-
225
- # for each value sequence for normal_xwise_cols: find out how many values
226
- # of extra xwise col.s there are
227
- @iterator.each_group() { |group_descr_hash, group_name|
228
-
229
- # make the hash key
230
- key = normal_xwise_cols.sort.map { |col_name|
231
- col_name + "=" + group_descr_hash[col_name]
232
- }.join(",")
233
-
234
- # record one occurrence of this hash key
235
- num_groups_for_normalxwise[key] += 1
236
- }
237
- end
238
-
239
- @iterator.each_group() { |group_descr_hash, group_name|
240
-
241
- if @exp.get("verbose")
242
- $stderr.puts group_name
243
- end
244
-
245
- # construct view for the current group
246
- @view = @iterator.get_a_view_for_current_group(@columns)
247
-
248
- ##
249
- # get counts of FE labels from unparsed sentences:
250
-
251
- # first take apart the group label to find
252
- # the frame name, target name, target POS name in this group
253
- # (all but one may be nil)
254
- frame = target = target_pos = nil
255
-
256
- # get a description of this group, array of pairs [column name, value]
257
- # where column name is the name of one database column
258
- @xwise.interleave(group_name.split()).each { |col_name, col_value|
259
- case col_name
260
- when "frame"
261
- frame = col_value
262
- when "target"
263
- target = col_value
264
- when "target_pos"
265
- target_pos = col_value
266
- else
267
- # additional database columns: handled below
268
- end
269
- }
270
-
271
- # do we have additional column names in "xwise", besides 'frame', 'target', 'target_pos'?
272
- if extra_xwise_cols.empty?
273
- split_between_groups = 1
274
- else
275
- key = normal_xwise_cols.sort.map { |col_name|
276
- col_name + "=" + group_descr_hash[col_name]
277
- }.join(",")
278
- split_between_groups = num_groups_for_normalxwise[key]
279
-
280
- # sanity check
281
- if split_between_groups == 0
282
- raise "shouldn't be here"
283
- end
284
- end
285
-
286
- # failed_fes returns: hash that maps FE names [String] onto numbers of failed FEs [Int]
287
- if @failed_parses_split
288
- @failed_parses_split.failed_fes(frame, target, target_pos).each_pair { |fe, count|
289
- # add this number of gold labels we failed to find
290
- # to the number of gold labels that Eval counts
291
-
292
- # if argrec, map all non-NONE FEs to "FE"
293
- if @step == "argrec" and fe != @exp.get("noval")
294
- fe = "FE"
295
- end
296
- inject_gold_counts(group_name, fe, (count.to_f / split_between_groups.to_f).round)
297
- }
298
- end
299
-
300
- # yield the name of the group to the Eval object for evaluation
301
- yield group_name
302
- @view.close()
303
- }
304
- end
305
-
306
- ###
307
- # each_instance
308
- #
309
- # given a group name, yield each instance of this group in turn,
310
- # or rather: yield pairs [gold_class(string), assigned_class(string)]
311
- #
312
- # this method depends on each_group() having been called before and
313
- # having initialized @view to the right view object
314
- def each_instance(group) # string: group name
315
- case @step
316
- when "all"
317
- # step "all":
318
- # if the argrec label is "NONE", use that as the assigned label.
319
- # else use the arglab-label
320
- @view.each_hash { |row|
321
- if row[@classif_column_argrec] == @exp.get("noval")
322
- yield [ row["gold"], row[@classif_column_argrec] ]
323
- else
324
- yield [ row["gold"], row[@classif_column_arglab] ]
325
- end
326
- }
327
-
328
- when "prune"
329
- # step "prune":
330
- # if the pruning column has entry 1, regard as assignment "FE",
331
- # else regard as assignment "NONE".
332
- @view.each_hash { |row|
333
- if row[@classif_column] == "1"
334
- yield [ row["gold"], "FE" ]
335
- else
336
- yield [ row["gold"], @exp.get("noval") ]
337
- end
338
- }
339
-
340
- else
341
- # argrec, arglab, onestep:
342
- # just yield pairs [goldlabel, classif_column_label]
343
- # as given in the view
344
-
345
- @view.each_hash { |row|
346
- yield [row["gold"], row[@classif_column]]
347
- }
348
- end
349
-
350
- end
351
- end
352
-
353
- ###########################################################33
354
- # This is the class to be called from rosy.rb
355
- ###########################################################33
356
- class RosyEvalTask < RosyTask
357
-
358
- def initialize(exp, # RosyConfigData object: experiment description
359
- opts, # hash: runtime argument option (string) -> value (string)
360
- ttt_obj) # RosyTrainingTestTable object
361
-
362
- #####
363
- # In enduser mode, this whole task is unavailable
364
- in_enduser_mode_unavailable()
365
-
366
- @exp = exp
367
- @ttt_obj = ttt_obj
368
-
369
- ##
370
- # check runtime options
371
- @step = "both"
372
- @splitID = nil
373
- @testID = default_test_ID()
374
-
375
- opts.each do |opt,arg|
376
- case opt
377
- when "--step"
378
- unless ["argrec", "arglab", "both", "onestep"].include? arg
379
- raise "Classification step must be one of: argrec, arglab, both, onestep. I got: " + arg.to_s
380
- end
381
- @step = arg
382
- when "--logID"
383
- @splitID = arg
384
- when "--testID"
385
- @testID = arg
386
- else
387
- # this is an option that is okay but has already been read and used by rosy.rb
388
- end
389
- end
390
- end
391
-
392
- def perform()
393
- dont_adjoin_frprep_exp = nil
394
- original_step = @step
395
-
396
- if ["both", "argrec", "onestep"].include? original_step and
397
- Pruning.prune?(@exp)
398
- # evaluate pruning
399
- $stderr.puts "Rosy evaluating pruning"
400
- @step = "prune"
401
- perform_aux()
402
- dont_adjoin_frprep_exp = "dont_adjoin_frprep_exp"
403
- end
404
-
405
- if original_step == "both"
406
- # both? then do first argrec, then arglab
407
- $stderr.puts "Rosy evaluating step argrec"
408
- @step = "argrec"
409
- perform_aux(dont_adjoin_frprep_exp)
410
-
411
-
412
- $stderr.puts "Rosy evaluating step arglab"
413
- @step = "arglab"
414
- perform_aux("dont_adjoin_frprep_exp")
415
-
416
- # KE Jan 30, 2006: evaluation "all" deactivated until we've
417
- # figured out how to evaluate accuracy for the NONE class
418
- # $stderr.puts "Rosy overall evaluation"
419
- # @step = "all"
420
- # perform_aux("dont_adjoin_frprep_exp")
421
-
422
- else
423
- # not both? then just do one
424
- @step = original_step
425
- perform_aux(dont_adjoin_frprep_exp)
426
- end
427
- end
428
-
429
- ###############3
430
- private
431
-
432
- # perform_aux: do the actual work of the perform() method
433
- # moved here because of the possibility of having @step=="both",
434
- # which makes it necessary to perform two eval steps one after the other
435
- def perform_aux(dont_adjoin_frprep_exp = nil) # string passed on to RosyEval initialize method
436
- # construct names for evaluation output file
437
- # and evaluation log file (which classifies each instances as correct/incorrect/unassigned)
438
- if @splitID
439
- outfilename_id = "split" + @splitID
440
- else
441
- outfilename_id = "test" + @testID
442
- end
443
- @outfilename = File.new_filename(@exp.instantiate("rosy_dir",
444
- "exp_ID" => @exp.get("experiment_ID")),
445
- @exp.instantiate("eval_file",
446
- "exp_ID" => @exp.get("experiment_ID"),
447
- "test_ID" => outfilename_id,
448
- "step" => @step))
449
-
450
- if @exp.get("print_eval_log")
451
- @logfilename = File.new_filename(@exp.instantiate("rosy_dir",
452
- "exp_ID" => @exp.get("experiment_ID")),
453
- @exp.instantiate("log_file",
454
- "exp_ID" => @exp.get("experiment_ID"),
455
- "test_ID" => outfilename_id,
456
- "step" => @step))
457
- else
458
- @logfilename = nil
459
- end
460
- @eval_obj = RosyEval.new(@exp, @ttt_obj, @step, @splitID, @testID,
461
- @outfilename, @logfilename,
462
- dont_adjoin_frprep_exp)
463
- @eval_obj.compute()
464
- end
465
- end