shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,60 +0,0 @@
1
- module TargetsMostFrequentSc
2
- def determine_target_most_frequent_sc(view,
3
- noval,
4
- with_frame_default = nil)
5
- target_subcat = Hash.new()
6
- frame_subcat = Hash.new()
7
-
8
- view.each_sentence { |sentence|
9
-
10
- target = tmf_target_key(sentence.first)
11
- frame = sentence.first["frame"]
12
- subcat = []
13
- # each instance: count individual Gf
14
- # add to sentencewide string
15
- sentence.each { |inst|
16
- if inst["fn_gf"] != noval and inst["fn_gf"] != "target"
17
- subcat << inst["fn_gf"]
18
- end
19
- } # each instance of sentence
20
-
21
- subcat.sort!
22
- subcat.uniq!
23
-
24
- # count sentwise GF for target
25
- if target_subcat[target].nil?
26
- target_subcat[target] = Hash.new(0)
27
- end
28
- target_subcat[target][subcat.join("_")] += 1
29
-
30
- # count same for frame
31
- if frame_subcat[frame].nil?
32
- frame_subcat[frame] = Hash.new(0)
33
- end
34
- frame_subcat[frame][subcat.join("_")] += 1
35
- } # each sentence of view
36
-
37
- # most frequent subcat for each target:
38
- retv = Hash.new()
39
- retv2 = Hash.new()
40
- [[retv, target_subcat], [retv2, frame_subcat]].each { |out_hash, in_hash|
41
-
42
- in_hash.each_pair { |key, subcats|
43
- most_frequent_subcat = subcats.to_a.max { |a,b| a.last <=> b.last }.first
44
- out_hash[key] = most_frequent_subcat
45
- }
46
- }
47
-
48
- if with_frame_default
49
- return [retv, retv2]
50
- else
51
- return retv
52
- end
53
- end
54
-
55
- def tmf_target_key(instance)
56
- return instance["frame"] + "." +
57
- instance["target"] + "." +
58
- instance["target_pos"]
59
- end
60
- end
@@ -1,418 +0,0 @@
1
- # class DBView
2
- # KE, SP 27.1.05
3
- #
4
- # builds on class DBTable, which offers access to a database table
5
- # extract views of the table (select columns, select rows)
6
- # and offers access methods for these views.
7
- # Rows of the table can be returned either as hashes or as arrays.
8
- #
9
- # There is a special column of the table (the name of which we get in the new() method),
10
- # the gold column.
11
- # It can be returned directly, or modified by some "dynamic feature object",
12
- # and its value (modified or unmodified) will always be last in the array representation of a row.
13
-
14
- require 'db/sql_query'
15
- require "common/ruby_class_extensions"
16
- require "common/RosyConventions"
17
-
18
- class DBView
19
-
20
- ################
21
- # new
22
- #
23
- # prepare a view.
24
- # given a list of DB tables to access, each with its
25
- # set of features to be returned in the view,
26
- # a set of value restrictions,
27
- # the name of the gold feature,
28
- # and a list of objects that manipulate the gold feature into alternate
29
- # gold features.
30
- #
31
- # value_restrictions restricts the view to those rows for which the value restrictions hold,
32
- # e.g. only those rows where frame = Bla, or only those rows where partofspeech = Blupp
33
- #
34
- # The view remembers the indices of the _first_ table in the list of tables
35
- # it is given.
36
- #
37
- # A standard dynamic ID can be given: DynGold objects all have an id() method,
38
- # which returns a string, by which the use of the object can be requested
39
- # of the view. If no dynamic ID is given below in methods each_array,
40
- # each_hash, each_sentence, the system falls back to the standard dynamic ID.
41
- # if none is given here, the standard DynGold object is the one that doesn't
42
- # change the gold column. If one is given here, it will be used by default
43
- # when no ID is given in each_hash, each_array, each_sentence
44
- #
45
- # The last parameter is a hash with the following optional entries:
46
- # "gold":
47
- # string: name of the gold feature
48
- # If you want the gold feature to be mapped using a DynGold object,
49
- # you need to specify this parameter -- and you need to include
50
- # the gold feature in some feature_list.
51
- # Warning: if a feature of this name appears in several of the
52
- # feature lists, only the first one is mapped
53
- # "dynamic_feature_list":
54
- # array:DynGold objects, list of objects that map the gold feature
55
- # to a different feature value (e.g. to "FE", "NONE")
56
- # DynGold objects have one method make: string -> string
57
- # that maps one gold feature,
58
- # and one method id: -> string that gives an ID unique to this DynGold class
59
- # and by which this DynGold class can be chosen.
60
- # "standard_dyngold_id":
61
- # string: standard DynGold object ID (see above)
62
- # "sentence_id_feature":
63
- # string: feature name for the sentence ID column, needed for each_sentence()
64
- #
65
- # further parameters that are passed on to SQLQuery.select: see there
66
-
67
- def initialize(table_col_pairs, # array:SelectTableAndColumns objects
68
- value_restrictions, # array:ValueRestriction objects
69
- db_obj, # MySql object (from mysql.rb) that already has access to the correct database
70
- parameters = {}) # hash with further parameters: see above
71
-
72
- @db_obj = db_obj
73
- @table_col_pairs = table_col_pairs
74
- @parameters = parameters
75
-
76
- # view empty?
77
- if @table_col_pairs.empty? or
78
- @table_col_pairs.big_and { |tc| tc.columns.class.to_s == "Array" and tc.columns.empty? }
79
- @view_empty = true
80
- return
81
- else
82
- @view_empty = false
83
- end
84
-
85
- # okay, we can make the view, it contains at least one table and
86
- # at least one column:
87
- # do one view for all columns requested, and one for the indices of each table
88
- #
89
- # @main_table is a DBResult object
90
- @main_table = execute_command(SQLQuery.select(@table_col_pairs,
91
- value_restrictions, parameters))
92
-
93
- # index_tables: Hash: table name => DBResult object
94
- @index_tables = Hash.new
95
- table_col_pairs.each_with_index { |tc, index|
96
- # read index column of this table, add all the other tables
97
- # with empty column lists
98
- index_table_col_pairs = @table_col_pairs.map_with_index { |other_tc, other_index|
99
- if other_index == index
100
- # the current table
101
- SelectTableAndColumns.new(tc.table_obj,
102
- [tc.table_obj.index_name])
103
- else
104
- # other table: keep just the table, not the columns
105
- SelectTableAndColumns.new(other_tc.table_obj, nil)
106
- end
107
- }
108
- @index_tables[tc.table_obj.table_name] = execute_command(SQLQuery.select(index_table_col_pairs,
109
- value_restrictions, parameters))
110
- }
111
-
112
- # map gold to something else?
113
- # yes, if parameters[gold] has been set
114
- if @parameters["gold"]
115
- @map_gold = true
116
- # remember which column in the DB table is the gold column
117
- @gold_index = column_names().index(@parameters["gold"])
118
- else
119
- @map_gold = false
120
- end
121
- end
122
-
123
- ################
124
- # close
125
- #
126
- # to be called when the view is no longer needed:
127
- # frees the DBResult objects underlying this view
128
- def close()
129
- unless @view_empty
130
- @main_table.free()
131
- @index_tables.each_value { |t| t.free() }
132
- end
133
- end
134
-
135
- ################
136
- # write_to_file
137
- #
138
- # writes instances to a file
139
- # each instance given as a comma-separated list of features
140
- # The features are the ones given in my_feature_list
141
- # (parameter to the new() method) above, in that order,
142
- # plus (dynamic) gold, which is last.
143
- #
144
- # guarantees that comma is used only to separate features -- but no other
145
- # changes in the feature values
146
- def write_to_file(file, # stream to write to
147
- dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
148
- # if nil, main gold is used
149
-
150
- each_instance_s(dyn_gold_id) { |instance_string|
151
- file.puts instance_string
152
- }
153
- end
154
-
155
-
156
- ################
157
- # each_instance_s
158
- #
159
- # yields each instance as a string:
160
- # a comma-separated list of features
161
- # The features are the ones given in my_feature_list
162
- # (parameter to the new() method) above, in that order,
163
- # plus (dynamic) gold, which is last.
164
- #
165
- # guarantees that comma is used only to separate features -- but no other
166
- # changes in the feature values
167
- def each_instance_s(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
168
- # if nil, main gold is used
169
- each_array(dyn_gold_id) {|array|
170
- yield array.map { |entry| entry.to_s.gsub(/,/, "COMMA") }.join(",")
171
- }
172
- end
173
-
174
- ################
175
- # each_hash
176
- #
177
- # iterates over hashes representing rows
178
- # in each row, there is a gold key/value pair
179
- # specified by the optional argument dyn_gold_id.
180
- # which is the string ID of a DynGold object
181
- # from the dynamic_feature_list.
182
- # If arg is not present, main gold is used
183
- #
184
- # The key for the gold is the dyn_gold_id
185
- # If that is nil, the key is 'gold'
186
- #
187
- # yields: hashes column_name -> column_value
188
- def each_hash(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
189
- if @view_empty
190
- return
191
- end
192
- if @map_gold
193
- dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
194
- end
195
- @main_table.reset()
196
-
197
- @main_table.each_hash { |row_hash|
198
- if @map_gold
199
- row_hash[@parameters["gold"]] = dyn_gold_obj.make(row_hash[@parameters["gold"]])
200
- end
201
-
202
- yield row_hash
203
- }
204
- end
205
-
206
- ################
207
- # each_array
208
- #
209
- # iterates over arrays representing rows
210
- # the last item of each row is the gold column
211
- # selected by the optional argument dyn_gold_id.
212
- # which is the string ID of a DynGold object
213
- # from the dynamic_feature_list.
214
- # If arg is not present, main gold is used
215
- #
216
- # yields: arrays of column values,
217
- # values are in the order of my_feature_list given
218
- # to the new() method, (dynamic) gold is last
219
- def each_array(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
220
-
221
- if @view_empty
222
- return
223
- end
224
- if @map_gold
225
- dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
226
- end
227
- @main_table.reset()
228
-
229
- @main_table.each {|row|
230
- if @gold_index
231
- gold = row.delete_at(@gold_index)
232
- if @map_gold
233
- row.push dyn_gold_obj.make(gold)
234
- else
235
- row.push gold
236
- end
237
- end
238
-
239
- yield row
240
- }
241
- end
242
-
243
- ################
244
- # update_column
245
- #
246
- # update a column for all rows of this view
247
- #
248
- # Given a column name to be updated, and a list of value tuples,
249
- # update each row of the view, or rather the appropriate column of each row of the view,
250
- # with values for that row.
251
- #
252
- # the list has the same length as the view, as there must be a value tuple
253
- # for each row of the view.
254
- #
255
- # returns: nothing
256
- def update_column(name, # string: column name
257
- values) # array of Objects
258
-
259
- if @view_empty
260
- raise "Cannot update empty view"
261
- end
262
-
263
- # find the first table in @table_col_pairs that has
264
- # a column with this name
265
- # and update that column
266
- @table_col_pairs.each { |tc|
267
- if (tc.columns.class.to_s == "Array" and tc.columns.include? name) or
268
- (tc.columns == "*" and tc.table_obj.list_column_names().include? name)
269
-
270
- table_name = tc.table_obj.table_name
271
-
272
- # sanity check: number of update entries must match
273
- # number of entries in this view
274
- unless values.length() == @index_tables[table_name].num_rows()
275
- $stderr.puts "Error: length of value array (#{values.length}) is not equal to length of view (#{@index_tables[table_name].num_rows})!"
276
- exit 1
277
- end
278
-
279
- @index_tables[tc.table_obj.table_name].reset()
280
-
281
- values.each { |value|
282
- index = @index_tables[table_name].fetch_row().first
283
- tc.table_obj.update_row(index, [[name, value]])
284
- }
285
-
286
- return
287
- end
288
- }
289
-
290
- # no match found
291
- $stderr.puts "View.rb Error: cannot update a column that is not in this view: #{name}"
292
- exit 1
293
- end
294
-
295
-
296
- ################
297
- # each_sentence
298
- #
299
- # like each_hash, but it groups the row hashes sentence-wise
300
- # sentence boundaries in the view are detected by the change in a
301
- # special column describing sentence IDs
302
- #
303
- # also needs a dyngold object id
304
- #
305
- # returns: an array of hashes column_name -> column_value
306
- def each_sentence(dyn_gold_id = nil) # string: ID of a DynGold object from the dynamic_feature_list, or nil
307
-
308
- # sanity check 1: need to know what the sentence ID is
309
- unless @parameters["sentence_id_feature"]
310
- raise "I need the name of the sentence ID feature for each_sentence()"
311
- end
312
- # sanity check 2: the view needs to include the sentence ID
313
- unless column_names().include? @parameters["sentence_id_feature"]
314
- raise "View.each_sentence: Cannot do this without sentence ID in the view"
315
- end
316
-
317
- last_sent_id = nil
318
- sentence = Array.new
319
- each_hash(dyn_gold_id) {|row_hash|
320
- if last_sent_id != row_hash[@parameters["sentence_id_feature"]] and
321
- (!(last_sent_id.nil?))
322
- yield sentence
323
- sentence = Array.new
324
- end
325
- last_sent_id = row_hash[@parameters["sentence_id_feature"]]
326
- sentence << row_hash
327
- }
328
- unless sentence.empty?
329
- yield sentence
330
- end
331
- end
332
-
333
- ######################
334
- # length
335
- #
336
- # returns the length of the view: the number of its rows
337
- def length()
338
- return @index_tables[@table_col_pairs.first.table_obj.table_name].num_rows
339
- end
340
-
341
- ###
342
- private
343
-
344
- ################
345
- # column_names
346
- #
347
- # returns: array:string
348
- # the list of column names for this view
349
- # in the right order
350
- def column_names()
351
- if @view_empty
352
- return []
353
- else
354
- return @main_table.list_column_names()
355
- end
356
- end
357
-
358
- ######
359
- # fetch_dyn_gold_obj
360
- #
361
- # given an ID of a gold object, look for the DynGold object
362
- # with this ID in the dynamic_feature_list and return it
363
- # If the ID is nil, use the standard dynamic gold ID that
364
- # has been set in the new() method.
365
- # If that is nil too, take the non-modified gold as a
366
- # default: return a dummy object with a make() method
367
- # that just returns its parameter.
368
- #
369
- # returns: object offering a make() method
370
-
371
- def fetch_dyn_gold_obj(dyn_gold_id) # string or nil
372
- # find a DynGold object that will transform the gold column
373
- if dyn_gold_id.nil?
374
- dyn_gold_id = @parameters["standard_dyngold_id"]
375
- end
376
-
377
- dyn_gold_obj = "we need an object that can do 'make'"
378
- if dyn_gold_id
379
- unless @parameters["dynamic_feature_list"]
380
- raise "No dynamic features given"
381
- end
382
-
383
- dyn_gold_obj = @parameters["dynamic_feature_list"].detect { |obj|
384
- obj.id() == dyn_gold_id
385
- }
386
- if dyn_gold_obj.nil?
387
- $stderr.puts "View.rb: Unknown DynGold ID " + dyn_gold_id
388
- $stderr.puts "Using unchanged gold"
389
- dyn_gold_id = nil
390
- end
391
- end
392
-
393
- unless dyn_gold_id
394
- # no dynamic gold ID: use unchanged gold by default
395
- class << dyn_gold_obj
396
- def make(x)
397
- x
398
- end
399
- def id()
400
- return "gold"
401
- end
402
- end
403
- end
404
- return dyn_gold_obj
405
- end
406
-
407
- def execute_command(command)
408
- begin
409
- return @db_obj.query(command)
410
- rescue MysqlError => e
411
- $stderr.puts "Error executing SQL query. Command was:\n" + command
412
- $stderr.puts "Error code: #{e.errno}"
413
- $stderr.puts "Error message: #{e.error}"
414
- raise e
415
- end
416
- end
417
-
418
- end