shalmaneser-rosy 1.2.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ module TargetsMostFrequentSc
2
+ def determine_target_most_frequent_sc(view,
3
+ noval,
4
+ with_frame_default = nil)
5
+ target_subcat = Hash.new()
6
+ frame_subcat = Hash.new()
7
+
8
+ view.each_sentence { |sentence|
9
+
10
+ target = tmf_target_key(sentence.first)
11
+ frame = sentence.first["frame"]
12
+ subcat = []
13
+ # each instance: count individual Gf
14
+ # add to sentencewide string
15
+ sentence.each { |inst|
16
+ if inst["fn_gf"] != noval and inst["fn_gf"] != "target"
17
+ subcat << inst["fn_gf"]
18
+ end
19
+ } # each instance of sentence
20
+
21
+ subcat.sort!
22
+ subcat.uniq!
23
+
24
+ # count sentwise GF for target
25
+ if target_subcat[target].nil?
26
+ target_subcat[target] = Hash.new(0)
27
+ end
28
+ target_subcat[target][subcat.join("_")] += 1
29
+
30
+ # count same for frame
31
+ if frame_subcat[frame].nil?
32
+ frame_subcat[frame] = Hash.new(0)
33
+ end
34
+ frame_subcat[frame][subcat.join("_")] += 1
35
+ } # each sentence of view
36
+
37
+ # most frequent subcat for each target:
38
+ retv = Hash.new()
39
+ retv2 = Hash.new()
40
+ [[retv, target_subcat], [retv2, frame_subcat]].each { |out_hash, in_hash|
41
+
42
+ in_hash.each_pair { |key, subcats|
43
+ most_frequent_subcat = subcats.to_a.max { |a,b| a.last <=> b.last }.first
44
+ out_hash[key] = most_frequent_subcat
45
+ }
46
+ }
47
+
48
+ if with_frame_default
49
+ return [retv, retv2]
50
+ else
51
+ return retv
52
+ end
53
+ end
54
+
55
+ def tmf_target_key(instance)
56
+ return instance["frame"] + "." +
57
+ instance["target"] + "." +
58
+ instance["target_pos"]
59
+ end
60
+ end
@@ -0,0 +1,418 @@
1
+ # class DBView
2
+ # KE, SP 27.1.05
3
+ #
4
+ # builds on class DBTable, which offers access to a database table
5
+ # extract views of the table (select columns, select rows)
6
+ # and offers access methods for these views.
7
+ # Rows of the table can be returned either as hashes or as arrays.
8
+ #
9
+ # There is a special column of the table (the name of which we get in the new() method),
10
+ # the gold column.
11
+ # It can be returned directly, or modified by some "dynamic feature object",
12
+ # and its value (modified or unmodified) will always be last in the array representation of a row.
13
+
14
+ require 'db/sql_query'
15
+ require "common/ruby_class_extensions"
16
+ require "common/RosyConventions"
17
+
18
+ class DBView
19
+
20
+ ################
21
+ # new
22
+ #
23
+ # prepare a view.
24
+ # given a list of DB tables to access, each with its
25
+ # set of features to be returned in the view,
26
+ # a set of value restrictions,
27
+ # the name of the gold feature,
28
+ # and a list of objects that manipulate the gold feature into alternate
29
+ # gold features.
30
+ #
31
+ # value_restrictions restricts the view to those rows for which the value restrictions hold,
32
+ # e.g. only those rows where frame = Bla, or only those rows where partofspeech = Blupp
33
+ #
34
+ # The view remembers the indices of the _first_ table in the list of tables
35
+ # it is given.
36
+ #
37
+ # A standard dynamic ID can be given: DynGold objects all have an id() method,
38
+ # which returns a string, by which the use of the object can be requested
39
+ # of the view. If no dynamic ID is given below in methods each_array,
40
+ # each_hash, each_sentence, the system falls back to the standard dynamic ID.
41
+ # if none is given here, the standard DynGold object is the one that doesn't
42
+ # change the gold column. If one is given here, it will be used by default
43
+ # when no ID is given in each_hash, each_array, each_sentence
44
+ #
45
+ # The last parameter is a hash with the following optional entries:
46
+ # "gold":
47
+ # string: name of the gold feature
48
+ # If you want the gold feature to be mapped using a DynGold object,
49
+ # you need to specify this parameter -- and you need to include
50
+ # the gold feature in some feature_list.
51
+ # Warning: if a feature of this name appears in several of the
52
+ # feature lists, only the first one is mapped
53
+ # "dynamic_feature_list":
54
+ # array:DynGold objects, list of objects that map the gold feature
55
+ # to a different feature value (e.g. to "FE", "NONE")
56
+ # DynGold objects have one method make: string -> string
57
+ # that maps one gold feature,
58
+ # and one method id: -> string that gives an ID unique to this DynGold class
59
+ # and by which this DynGold class can be chosen.
60
+ # "standard_dyngold_id":
61
+ # string: standard DynGold object ID (see above)
62
+ # "sentence_id_feature":
63
+ # string: feature name for the sentence ID column, needed for each_sentence()
64
+ #
65
+ # further parameters that are passed on to SQLQuery.select: see there
66
+
67
+ def initialize(table_col_pairs, # array:SelectTableAndColumns objects
68
+ value_restrictions, # array:ValueRestriction objects
69
+ db_obj, # MySql object (from mysql.rb) that already has access to the correct database
70
+ parameters = {}) # hash with further parameters: see above
71
+
72
+ @db_obj = db_obj
73
+ @table_col_pairs = table_col_pairs
74
+ @parameters = parameters
75
+
76
+ # view empty?
77
+ if @table_col_pairs.empty? or
78
+ @table_col_pairs.big_and { |tc| tc.columns.class.to_s == "Array" and tc.columns.empty? }
79
+ @view_empty = true
80
+ return
81
+ else
82
+ @view_empty = false
83
+ end
84
+
85
+ # okay, we can make the view, it contains at least one table and
86
+ # at least one column:
87
+ # do one view for all columns requested, and one for the indices of each table
88
+ #
89
+ # @main_table is a DBResult object
90
+ @main_table = execute_command(SQLQuery.select(@table_col_pairs,
91
+ value_restrictions, parameters))
92
+
93
+ # index_tables: Hash: table name => DBResult object
94
+ @index_tables = Hash.new
95
+ table_col_pairs.each_with_index { |tc, index|
96
+ # read index column of this table, add all the other tables
97
+ # with empty column lists
98
+ index_table_col_pairs = @table_col_pairs.map_with_index { |other_tc, other_index|
99
+ if other_index == index
100
+ # the current table
101
+ SelectTableAndColumns.new(tc.table_obj,
102
+ [tc.table_obj.index_name])
103
+ else
104
+ # other table: keep just the table, not the columns
105
+ SelectTableAndColumns.new(other_tc.table_obj, nil)
106
+ end
107
+ }
108
+ @index_tables[tc.table_obj.table_name] = execute_command(SQLQuery.select(index_table_col_pairs,
109
+ value_restrictions, parameters))
110
+ }
111
+
112
+ # map gold to something else?
113
+ # yes, if parameters[gold] has been set
114
+ if @parameters["gold"]
115
+ @map_gold = true
116
+ # remember which column in the DB table is the gold column
117
+ @gold_index = column_names().index(@parameters["gold"])
118
+ else
119
+ @map_gold = false
120
+ end
121
+ end
122
+
123
+ ################
124
+ # close
125
+ #
126
+ # to be called when the view is no longer needed:
127
+ # frees the DBResult objects underlying this view
128
+ def close()
129
+ unless @view_empty
130
+ @main_table.free()
131
+ @index_tables.each_value { |t| t.free() }
132
+ end
133
+ end
134
+
135
+ ################
136
+ # write_to_file
137
+ #
138
+ # writes instances to a file
139
+ # each instance given as a comma-separated list of features
140
+ # The features are the ones given in my_feature_list
141
+ # (parameter to the new() method) above, in that order,
142
+ # plus (dynamic) gold, which is last.
143
+ #
144
+ # guarantees that comma is used only to separate features -- but no other
145
+ # changes in the feature values
146
+ def write_to_file(file, # stream to write to
147
+ dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
148
+ # if nil, main gold is used
149
+
150
+ each_instance_s(dyn_gold_id) { |instance_string|
151
+ file.puts instance_string
152
+ }
153
+ end
154
+
155
+
156
+ ################
157
+ # each_instance_s
158
+ #
159
+ # yields each instance as a string:
160
+ # a comma-separated list of features
161
+ # The features are the ones given in my_feature_list
162
+ # (parameter to the new() method) above, in that order,
163
+ # plus (dynamic) gold, which is last.
164
+ #
165
+ # guarantees that comma is used only to separate features -- but no other
166
+ # changes in the feature values
167
+ def each_instance_s(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
168
+ # if nil, main gold is used
169
+ each_array(dyn_gold_id) {|array|
170
+ yield array.map { |entry| entry.to_s.gsub(/,/, "COMMA") }.join(",")
171
+ }
172
+ end
173
+
174
+ ################
175
+ # each_hash
176
+ #
177
+ # iterates over hashes representing rows
178
+ # in each row, there is a gold key/value pair
179
+ # specified by the optional argument dyn_gold_id.
180
+ # which is the string ID of a DynGold object
181
+ # from the dynamic_feature_list.
182
+ # If arg is not present, main gold is used
183
+ #
184
+ # The key for the gold is the dyn_gold_id
185
+ # If that is nil, the key is 'gold'
186
+ #
187
+ # yields: hashes column_name -> column_value
188
+ def each_hash(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
189
+ if @view_empty
190
+ return
191
+ end
192
+ if @map_gold
193
+ dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
194
+ end
195
+ @main_table.reset()
196
+
197
+ @main_table.each_hash { |row_hash|
198
+ if @map_gold
199
+ row_hash[@parameters["gold"]] = dyn_gold_obj.make(row_hash[@parameters["gold"]])
200
+ end
201
+
202
+ yield row_hash
203
+ }
204
+ end
205
+
206
+ ################
207
+ # each_array
208
+ #
209
+ # iterates over arrays representing rows
210
+ # the last item of each row is the gold column
211
+ # selected by the optional argument dyn_gold_id.
212
+ # which is the string ID of a DynGold object
213
+ # from the dynamic_feature_list.
214
+ # If arg is not present, main gold is used
215
+ #
216
+ # yields: arrays of column values,
217
+ # values are in the order of my_feature_list given
218
+ # to the new() method, (dynamic) gold is last
219
+ def each_array(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
220
+
221
+ if @view_empty
222
+ return
223
+ end
224
+ if @map_gold
225
+ dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
226
+ end
227
+ @main_table.reset()
228
+
229
+ @main_table.each {|row|
230
+ if @gold_index
231
+ gold = row.delete_at(@gold_index)
232
+ if @map_gold
233
+ row.push dyn_gold_obj.make(gold)
234
+ else
235
+ row.push gold
236
+ end
237
+ end
238
+
239
+ yield row
240
+ }
241
+ end
242
+
243
+ ################
244
+ # update_column
245
+ #
246
+ # update a column for all rows of this view
247
+ #
248
+ # Given a column name to be updated, and a list of value tuples,
249
+ # update each row of the view, or rather the appropriate column of each row of the view,
250
+ # with values for that row.
251
+ #
252
+ # the list has the same length as the view, as there must be a value tuple
253
+ # for each row of the view.
254
+ #
255
+ # returns: nothing
256
+ def update_column(name, # string: column name
257
+ values) # array of Objects
258
+
259
+ if @view_empty
260
+ raise "Cannot update empty view"
261
+ end
262
+
263
+ # find the first table in @table_col_pairs that has
264
+ # a column with this name
265
+ # and update that column
266
+ @table_col_pairs.each { |tc|
267
+ if (tc.columns.class.to_s == "Array" and tc.columns.include? name) or
268
+ (tc.columns == "*" and tc.table_obj.list_column_names().include? name)
269
+
270
+ table_name = tc.table_obj.table_name
271
+
272
+ # sanity check: number of update entries must match
273
+ # number of entries in this view
274
+ unless values.length() == @index_tables[table_name].num_rows()
275
+ $stderr.puts "Error: length of value array (#{values.length}) is not equal to length of view (#{@index_tables[table_name].num_rows})!"
276
+ exit 1
277
+ end
278
+
279
+ @index_tables[tc.table_obj.table_name].reset()
280
+
281
+ values.each { |value|
282
+ index = @index_tables[table_name].fetch_row().first
283
+ tc.table_obj.update_row(index, [[name, value]])
284
+ }
285
+
286
+ return
287
+ end
288
+ }
289
+
290
+ # no match found
291
+ $stderr.puts "View.rb Error: cannot update a column that is not in this view: #{name}"
292
+ exit 1
293
+ end
294
+
295
+
296
+ ################
297
+ # each_sentence
298
+ #
299
+ # like each_hash, but it groups the row hashes sentence-wise
300
+ # sentence boundaries in the view are detected by the change in a
301
+ # special column describing sentence IDs
302
+ #
303
+ # also needs a dyngold object id
304
+ #
305
+ # returns: an array of hashes column_name -> column_value
306
+ def each_sentence(dyn_gold_id = nil) # string: ID of a DynGold object from the dynamic_feature_list, or nil
307
+
308
+ # sanity check 1: need to know what the sentence ID is
309
+ unless @parameters["sentence_id_feature"]
310
+ raise "I need the name of the sentence ID feature for each_sentence()"
311
+ end
312
+ # sanity check 2: the view needs to include the sentence ID
313
+ unless column_names().include? @parameters["sentence_id_feature"]
314
+ raise "View.each_sentence: Cannot do this without sentence ID in the view"
315
+ end
316
+
317
+ last_sent_id = nil
318
+ sentence = Array.new
319
+ each_hash(dyn_gold_id) {|row_hash|
320
+ if last_sent_id != row_hash[@parameters["sentence_id_feature"]] and
321
+ (!(last_sent_id.nil?))
322
+ yield sentence
323
+ sentence = Array.new
324
+ end
325
+ last_sent_id = row_hash[@parameters["sentence_id_feature"]]
326
+ sentence << row_hash
327
+ }
328
+ unless sentence.empty?
329
+ yield sentence
330
+ end
331
+ end
332
+
333
+ ######################
334
+ # length
335
+ #
336
+ # returns the length of the view: the number of its rows
337
+ def length()
338
+ return @index_tables[@table_col_pairs.first.table_obj.table_name].num_rows
339
+ end
340
+
341
+ ###
342
+ private
343
+
344
+ ################
345
+ # column_names
346
+ #
347
+ # returns: array:string
348
+ # the list of column names for this view
349
+ # in the right order
350
+ def column_names()
351
+ if @view_empty
352
+ return []
353
+ else
354
+ return @main_table.list_column_names()
355
+ end
356
+ end
357
+
358
+ ######
359
+ # fetch_dyn_gold_obj
360
+ #
361
+ # given an ID of a gold object, look for the DynGold object
362
+ # with this ID in the dynamic_feature_list and return it
363
+ # If the ID is nil, use the standard dynamic gold ID that
364
+ # has been set in the new() method.
365
+ # If that is nil too, take the non-modified gold as a
366
+ # default: return a dummy object with a make() method
367
+ # that just returns its parameter.
368
+ #
369
+ # returns: object offering a make() method
370
+
371
+ def fetch_dyn_gold_obj(dyn_gold_id) # string or nil
372
+ # find a DynGold object that will transform the gold column
373
+ if dyn_gold_id.nil?
374
+ dyn_gold_id = @parameters["standard_dyngold_id"]
375
+ end
376
+
377
+ dyn_gold_obj = "we need an object that can do 'make'"
378
+ if dyn_gold_id
379
+ unless @parameters["dynamic_feature_list"]
380
+ raise "No dynamic features given"
381
+ end
382
+
383
+ dyn_gold_obj = @parameters["dynamic_feature_list"].detect { |obj|
384
+ obj.id() == dyn_gold_id
385
+ }
386
+ if dyn_gold_obj.nil?
387
+ $stderr.puts "View.rb: Unknown DynGold ID " + dyn_gold_id
388
+ $stderr.puts "Using unchanged gold"
389
+ dyn_gold_id = nil
390
+ end
391
+ end
392
+
393
+ unless dyn_gold_id
394
+ # no dynamic gold ID: use unchanged gold by default
395
+ class << dyn_gold_obj
396
+ def make(x)
397
+ x
398
+ end
399
+ def id()
400
+ return "gold"
401
+ end
402
+ end
403
+ end
404
+ return dyn_gold_obj
405
+ end
406
+
407
+ def execute_command(command)
408
+ begin
409
+ return @db_obj.query(command)
410
+ rescue MysqlError => e
411
+ $stderr.puts "Error executing SQL query. Command was:\n" + command
412
+ $stderr.puts "Error code: #{e.errno}"
413
+ $stderr.puts "Error message: #{e.error}"
414
+ raise e
415
+ end
416
+ end
417
+
418
+ end