shalmaneser-lib 1.2.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/configuration/config_data.rb +457 -0
  7. data/lib/configuration/config_format_element.rb +210 -0
  8. data/lib/configuration/configuration_error.rb +15 -0
  9. data/lib/configuration/external_config_data.rb +56 -0
  10. data/lib/configuration/frappe_config_data.rb +134 -0
  11. data/lib/configuration/fred_config_data.rb +199 -0
  12. data/lib/configuration/rosy_config_data.rb +126 -0
  13. data/lib/db/db_interface.rb +50 -0
  14. data/lib/db/db_mysql.rb +141 -0
  15. data/lib/db/db_sqlite.rb +280 -0
  16. data/lib/db/db_table.rb +237 -0
  17. data/lib/db/db_view.rb +416 -0
  18. data/lib/db/db_wrapper.rb +175 -0
  19. data/lib/db/select_table_and_columns.rb +10 -0
  20. data/lib/db/sql_query.rb +243 -0
  21. data/lib/definitions.rb +19 -0
  22. data/lib/eval.rb +482 -0
  23. data/lib/ext/maxent/Classify.class +0 -0
  24. data/lib/ext/maxent/Train.class +0 -0
  25. data/lib/external_systems.rb +251 -0
  26. data/lib/framenet_format/fn_corpus_aset.rb +209 -0
  27. data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
  28. data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
  29. data/lib/framenet_format/fn_database.rb +143 -0
  30. data/lib/framenet_format/frame_xml_file.rb +104 -0
  31. data/lib/framenet_format/frame_xml_sentence.rb +411 -0
  32. data/lib/logging.rb +25 -0
  33. data/lib/ml/classifier.rb +189 -0
  34. data/lib/ml/mallet.rb +236 -0
  35. data/lib/ml/maxent.rb +229 -0
  36. data/lib/ml/optimize.rb +195 -0
  37. data/lib/ml/timbl.rb +140 -0
  38. data/lib/monkey_patching/array.rb +82 -0
  39. data/lib/monkey_patching/enumerable_bool.rb +24 -0
  40. data/lib/monkey_patching/enumerable_distribute.rb +18 -0
  41. data/lib/monkey_patching/file.rb +131 -0
  42. data/lib/monkey_patching/subsumed.rb +24 -0
  43. data/lib/ruby_class_extensions.rb +4 -0
  44. data/lib/salsa_tiger_xml/corpus.rb +24 -0
  45. data/lib/salsa_tiger_xml/fe_node.rb +98 -0
  46. data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
  47. data/lib/salsa_tiger_xml/frame_node.rb +145 -0
  48. data/lib/salsa_tiger_xml/graph_node.rb +347 -0
  49. data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
  50. data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
  51. data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
  52. data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
  53. data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
  54. data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
  55. data/lib/salsa_tiger_xml/sem_node.rb +58 -0
  56. data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
  57. data/lib/salsa_tiger_xml/syn_node.rb +169 -0
  58. data/lib/salsa_tiger_xml/tree_node.rb +59 -0
  59. data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
  60. data/lib/salsa_tiger_xml/usp_node.rb +72 -0
  61. data/lib/salsa_tiger_xml/xml_node.rb +163 -0
  62. data/lib/shalmaneser/lib.rb +1 -0
  63. data/lib/tabular_format/fn_tab_format_file.rb +38 -0
  64. data/lib/tabular_format/fn_tab_frame.rb +67 -0
  65. data/lib/tabular_format/fn_tab_sentence.rb +169 -0
  66. data/lib/tabular_format/tab_format_file.rb +91 -0
  67. data/lib/tabular_format/tab_format_named_args.rb +184 -0
  68. data/lib/tabular_format/tab_format_sentence.rb +119 -0
  69. data/lib/value_restriction.rb +49 -0
  70. metadata +131 -0
@@ -0,0 +1,237 @@
1
+ # class DBTable
2
+ # KE, SP 27.1.05
3
+ #
4
+ # Manages one table in a (given) SQL database
5
+ # Doesn't know anything about the ROSY application
6
+ # Just creating a table, changing the table, and accessing it.
7
+ #
8
+
9
+ require 'db/sql_query'
10
+ # require "RosyConventions"
11
+
12
+ class DBTable
13
+ attr_reader :index_name, :table_name
14
+
15
+ #####
16
+ # new
17
+ #
18
+ # creates the table for this object.
19
+ # The name of the table (given as parameter) can be new, in which caes the table
20
+ # is created, or old, in which case we check whether its format matches the format
21
+ # given in the parameters.
22
+ #
23
+ # The table format is given in the form of column formats (column names and column formats,
24
+ # formats are the usual SQLy things). Additionally, a subset of the column names can be
25
+ # designated index columns, which means that the table is indexed (and can be searched quickly)
26
+ # for them.
27
+ #
28
+ # DBTable internally constructs a "Primary index" feature that is called "XXindexXX" (autoincrement column)
29
+ #
30
+ # For all columns that are added later using add_columns, DBTable adds a prefix to the column names;
31
+ # these columns are not checked against the column_formats when opening an existing table;
32
+ # this can be used to store experiment-specific data.
33
+
34
+ def initialize(db_obj, # DBWrapper object
35
+ table_name, # string: name of DB table (existing/new)
36
+ mode, # new: starts new DB table, removes old if it exists. open: reopens existing DB table
37
+ hash={}) # hash: parameter name => parameter value, depending on mode
38
+ # mode= new needs:
39
+ # 'col_formats': array:array len 2: string*string, [column_name, column_format]
40
+ # 'index_cols': array:string: column_names that should be used to index the table
41
+ # 'addcol_prefix': string: prefix for names of additional columns
42
+ # mode='open' needs:
43
+ # 'col_formats': array: string*string: column names/formats
44
+ # May be nil, in that case column name match isn't tested
45
+
46
+ @index_name = "XXindexXX"
47
+ @db_obj = db_obj
48
+ @table_name = table_name
49
+
50
+ case mode
51
+ when 'new'
52
+ ###
53
+ # open new database
54
+
55
+ # sanity check: exactly the required parameters present?
56
+ unless hash.keys.sort == ['addcol_prefix', 'col_formats', 'index_cols']
57
+ raise "Expecting hash parameters 'addcol_prefix', 'col_formats', 'index_cols'.\n" +
58
+ "I got: " + hash.keys.join(", ")
59
+ end
60
+
61
+ # sanity check: main index column name should be unique
62
+ all_column_names = hash['col_formats'].map { |name, format| name}
63
+ if all_column_names.include? @index_name
64
+ raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
65
+ end
66
+
67
+ # sanity check: index_column_names should be included in column_names
68
+ hash['index_cols'].each { |name|
69
+ unless all_column_names.include? name
70
+ raise "[DBTable] #{name} is in the list of index names, but it isn't in the list of column names."
71
+ end
72
+ }
73
+
74
+ # does a table with name table_name exist? if so, remove it
75
+ if @db_obj.list_tables.include? table_name
76
+ # this table exists
77
+ # remove old table
78
+ @db_obj.drop_table(table_name)
79
+ end
80
+
81
+ @db_obj.create_table(table_name,hash['col_formats'],
82
+ hash['index_cols'], @index_name)
83
+ when 'open'
84
+
85
+ ###
86
+ # open existing database table
87
+
88
+ # sanity check: exactly the required parameters present?
89
+ hash.keys.each { |key|
90
+ unless ['addcol_prefix', 'col_names'].include? key
91
+ raise "Expecting hash parameters 'addcol_prefix', 'col_names'.\n" +
92
+ "I got: " + hash.keys.join(", ")
93
+ end
94
+ }
95
+ # sanity check: main index column name should be unique
96
+ if hash['col_names'] && hash['col_names'].include?(@index_name)
97
+ raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
98
+ end
99
+
100
+ # does a table with name table_name exist?
101
+ unless @db_obj.list_tables.include? table_name
102
+ raise "[DBTable] Sorry, I cannot find a database table named #{table_name}."
103
+ end
104
+
105
+ # check if all column formats match
106
+
107
+ if hash['col_names']
108
+
109
+ existing_fields = @db_obj.list_column_names(table_name).reject { |col|
110
+ col =~ /^#{hash["addcol_prefix"]}/ or
111
+ col == @index_name
112
+ }
113
+
114
+ unless existing_fields.sort == hash["col_names"].sort
115
+ raise "[DBTable] Column names in the DB table #{table_name}\n" +
116
+ "don't match feature specification in the experiment file.\n" +
117
+ "Table:\n\t" + existing_fields.sort.join(", ") +
118
+ "\n\nExp. file:\n\t" + hash["col_names"].sort.join(", ")
119
+ end
120
+
121
+ else
122
+ # no column names given, no check of column formats
123
+ end
124
+
125
+ else
126
+ raise "Parameter 'mode' needs to be either 'new' or 'open'! I got " + mode.to_s
127
+ end
128
+ end
129
+
130
+ #####
131
+ # list_column_names
132
+ #
133
+ # list column names of this table
134
+ #
135
+ # returns: array:string, list of column names
136
+ def list_column_names
137
+ return @db_obj.list_column_names(@table_name)
138
+ end
139
+
140
+ #####
141
+ # list_column_formats
142
+ #
143
+ # list column names and column types of this table
144
+ #
145
+ # returns: array:string*string, list of pairs [column name, column format]
146
+ def list_column_formats
147
+ return @db_obj.list_column_formats(@table_name)
148
+ end
149
+
150
+ #####
151
+ # change_format_add_columns
152
+ #
153
+ # adds one or more columns to the table managed by this object
154
+ # columns are given by their names and formats, as above
155
+ #
156
+ # returns: nothing
157
+ def change_format_add_columns(column_formats) # array: string*string [column_name,column_format]
158
+
159
+ if column_formats.nil? or column_formats.empty?
160
+ raise "Need nonempty column_formats list"
161
+ end
162
+
163
+ column_formats.each { |col_name, col_format|
164
+ unless col_name =~ /^#{@addcol_prefix}/
165
+ raise "Columns that are added need to have prefix #{@addcol_prefix}!"
166
+ end
167
+ }
168
+
169
+ execute_command(SQLQuery.add_columns(@table_name, column_formats))
170
+ end
171
+
172
+ #####
173
+ # change_format_remove_column
174
+ #
175
+ # removes one column from the table managed by this object
176
+ #
177
+ # returns: nothing
178
+ def change_format_remove_column(column_name) # string:name of the column to remove
179
+ unless list_column_names(@table_name).include? column_name
180
+ $stderr.puts "WARNING: Cannot remove column #{column_name}: I don't have it"
181
+ return
182
+ end
183
+
184
+ execute_command("ALTER TABLE #{@table_name} DROP COLUMN #{column_name}")
185
+ end
186
+
187
+
188
+ #####
189
+ # insert_row
190
+ #
191
+ # inserts a new row into the table and fills cells with values, as specified
192
+ # by the column_value_pairs
193
+ #
194
+ # returns: nothing
195
+ def insert_row(column_value_pairs) # array: string*Object [column_name,column_value]
196
+ if column_value_pairs.nil? or column_value_pairs.empty?
197
+ raise "Need nonempty column_value_pairs list"
198
+ end
199
+ execute_command(SQLQuery.insert(@table_name,column_value_pairs))
200
+ end
201
+
202
+ #####
203
+ # update_row
204
+ #
205
+ # update column values for a given row which is identified
206
+ # via its (autoincrement) index
207
+ #
208
+ # returns: nothing
209
+ def update_row(index, # index, content of autoincrement column
210
+ column_value_pairs) # array: string*Object [column_name, column_value]
211
+
212
+ if column_value_pairs.nil? or column_value_pairs.empty?
213
+ raise "Need nonempty column_value_pairs list"
214
+ end
215
+ execute_command(SQLQuery.update(@table_name,
216
+ column_value_pairs,
217
+ [ValueRestriction.new(@index_name, index)]))
218
+ end
219
+
220
+ ####
221
+ private
222
+
223
+ ###
224
+ # execute_command:
225
+ # execute DB command
226
+ #
227
+ # returns nil: the commands in this package are all
228
+ # not of the kind that requires a return value
229
+ def execute_command(command)
230
+ begin
231
+ @db_obj.query_noretv(command)
232
+ rescue
233
+ $stderr.puts "Error executing SQL query. Command was:\n" + command
234
+ exit 1
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,416 @@
1
+ # class DBView
2
+ # KE, SP 27.1.05
3
+ #
4
+ # builds on class DBTable, which offers access to a database table
5
+ # extract views of the table (select columns, select rows)
6
+ # and offers access methods for these views.
7
+ # Rows of the table can be returned either as hashes or as arrays.
8
+ #
9
+ # There is a special column of the table (the name of which we get in the new() method),
10
+ # the gold column.
11
+ # It can be returned directly, or modified by some "dynamic feature object",
12
+ # and its value (modified or unmodified) will always be last in the array representation of a row.
13
+
14
+ require 'db/sql_query'
15
+ require "ruby_class_extensions"
16
+ # require "RosyConventions"
17
+ require 'db/select_table_and_columns'
18
+
19
+ class DBView
20
+
21
+ ################
22
+ # new
23
+ #
24
+ # prepare a view.
25
+ # given a list of DB tables to access, each with its
26
+ # set of features to be returned in the view,
27
+ # a set of value restrictions,
28
+ # the name of the gold feature,
29
+ # and a list of objects that manipulate the gold feature into alternate
30
+ # gold features.
31
+ #
32
+ # value_restrictions restricts the view to those rows for which the value restrictions hold,
33
+ # e.g. only those rows where frame = Bla, or only those rows where partofspeech = Blupp
34
+ #
35
+ # The view remembers the indices of the _first_ table in the list of tables
36
+ # it is given.
37
+ #
38
+ # A standard dynamic ID can be given: DynGold objects all have an id() method,
39
+ # which returns a string, by which the use of the object can be requested
40
+ # of the view. If no dynamic ID is given below in methods each_array,
41
+ # each_hash, each_sentence, the system falls back to the standard dynamic ID.
42
+ # if none is given here, the standard DynGold object is the one that doesn't
43
+ # change the gold column. If one is given here, it will be used by default
44
+ # when no ID is given in each_hash, each_array, each_sentence
45
+ #
46
+ # The last parameter is a hash with the following optional entries:
47
+ # "gold":
48
+ # string: name of the gold feature
49
+ # If you want the gold feature to be mapped using a DynGold object,
50
+ # you need to specify this parameter -- and you need to include
51
+ # the gold feature in some feature_list.
52
+ # Warning: if a feature of this name appears in several of the
53
+ # feature lists, only the first one is mapped
54
+ # "dynamic_feature_list":
55
+ # array:DynGold objects, list of objects that map the gold feature
56
+ # to a different feature value (e.g. to "FE", "NONE")
57
+ # DynGold objects have one method make: string -> string
58
+ # that maps one gold feature,
59
+ # and one method id: -> string that gives an ID unique to this DynGold class
60
+ # and by which this DynGold class can be chosen.
61
+ # "standard_dyngold_id":
62
+ # string: standard DynGold object ID (see above)
63
+ # "sentence_id_feature":
64
+ # string: feature name for the sentence ID column, needed for each_sentence()
65
+ #
66
+ # further parameters that are passed on to SQLQuery.select: see there
67
+
68
+ def initialize(table_col_pairs, # array:SelectTableAndColumns objects
69
+ value_restrictions, # array:ValueRestriction objects
70
+ db_obj, # MySql object (from mysql.rb) that already has access to the correct database
71
+ parameters = {}) # hash with further parameters: see above
72
+
73
+ @db_obj = db_obj
74
+ @table_col_pairs = table_col_pairs
75
+ @parameters = parameters
76
+
77
+ # view empty?
78
+ if @table_col_pairs.empty? or
79
+ @table_col_pairs.big_and { |tc| tc.columns.is_a?(Array) and tc.columns.empty? }
80
+ @view_empty = true
81
+ return
82
+ else
83
+ @view_empty = false
84
+ end
85
+
86
+ # okay, we can make the view, it contains at least one table and
87
+ # at least one column:
88
+ # do one view for all columns requested, and one for the indices of each table
89
+ #
90
+ # @main_table is a DBResult object
91
+ @main_table = execute_command(SQLQuery.select(@table_col_pairs,
92
+ value_restrictions, parameters))
93
+
94
+ # index_tables: Hash: table name => DBResult object
95
+ @index_tables = {}
96
+ table_col_pairs.each_with_index { |tc, index|
97
+ # read index column of this table, add all the other tables
98
+ # with empty column lists
99
+ index_table_col_pairs = @table_col_pairs.map_with_index { |other_tc, other_index|
100
+ if other_index == index
101
+ # the current table
102
+ SelectTableAndColumns.new(tc.table_obj,
103
+ [tc.table_obj.index_name])
104
+ else
105
+ # other table: keep just the table, not the columns
106
+ SelectTableAndColumns.new(other_tc.table_obj, nil)
107
+ end
108
+ }
109
+ @index_tables[tc.table_obj.table_name] = execute_command(SQLQuery.select(index_table_col_pairs,
110
+ value_restrictions, parameters))
111
+ }
112
+
113
+ # map gold to something else?
114
+ # yes, if parameters[gold] has been set
115
+ if @parameters["gold"]
116
+ @map_gold = true
117
+ # remember which column in the DB table is the gold column
118
+ @gold_index = column_names.index(@parameters["gold"])
119
+ else
120
+ @map_gold = false
121
+ end
122
+ end
123
+
124
+ ################
125
+ # close
126
+ #
127
+ # to be called when the view is no longer needed:
128
+ # frees the DBResult objects underlying this view
129
+ def close
130
+ unless @view_empty
131
+ @main_table.free
132
+ @index_tables.each_value { |t| t.free }
133
+ end
134
+ end
135
+
136
+ ################
137
+ # write_to_file
138
+ #
139
+ # writes instances to a file
140
+ # each instance given as a comma-separated list of features
141
+ # The features are the ones given in my_feature_list
142
+ # (parameter to the new() method) above, in that order,
143
+ # plus (dynamic) gold, which is last.
144
+ #
145
+ # guarantees that comma is used only to separate features -- but no other
146
+ # changes in the feature values
147
+ def write_to_file(file, # stream to write to
148
+ dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
149
+ # if nil, main gold is used
150
+
151
+ each_instance_s(dyn_gold_id) { |instance_string|
152
+ file.puts instance_string
153
+ }
154
+ end
155
+
156
+
157
+ ################
158
+ # each_instance_s
159
+ #
160
+ # yields each instance as a string:
161
+ # a comma-separated list of features
162
+ # The features are the ones given in my_feature_list
163
+ # (parameter to the new() method) above, in that order,
164
+ # plus (dynamic) gold, which is last.
165
+ #
166
+ # guarantees that comma is used only to separate features -- but no other
167
+ # changes in the feature values
168
+ def each_instance_s(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
169
+ # if nil, main gold is used
170
+ each_array(dyn_gold_id) {|array|
171
+ yield array.map { |entry| entry.to_s.gsub(/,/, "COMMA") }.join(",")
172
+ }
173
+ end
174
+
175
+ ################
176
+ # each_hash
177
+ #
178
+ # iterates over hashes representing rows
179
+ # in each row, there is a gold key/value pair
180
+ # specified by the optional argument dyn_gold_id.
181
+ # which is the string ID of a DynGold object
182
+ # from the dynamic_feature_list.
183
+ # If arg is not present, main gold is used
184
+ #
185
+ # The key for the gold is the dyn_gold_id
186
+ # If that is nil, the key is 'gold'
187
+ #
188
+ # yields: hashes column_name -> column_value
189
+ def each_hash(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
190
+ if @view_empty
191
+ return
192
+ end
193
+ if @map_gold
194
+ dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
195
+ end
196
+ @main_table.reset
197
+
198
+ @main_table.each_hash { |row_hash|
199
+ if @map_gold
200
+ row_hash[@parameters["gold"]] = dyn_gold_obj.make(row_hash[@parameters["gold"]])
201
+ end
202
+
203
+ yield row_hash
204
+ }
205
+ end
206
+
207
+ ################
208
+ # each_array
209
+ #
210
+ # iterates over arrays representing rows
211
+ # the last item of each row is the gold column
212
+ # selected by the optional argument dyn_gold_id.
213
+ # which is the string ID of a DynGold object
214
+ # from the dynamic_feature_list.
215
+ # If arg is not present, main gold is used
216
+ #
217
+ # yields: arrays of column values,
218
+ # values are in the order of my_feature_list given
219
+ # to the new() method, (dynamic) gold is last
220
+ def each_array(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
221
+
222
+ if @view_empty
223
+ return
224
+ end
225
+ if @map_gold
226
+ dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
227
+ end
228
+ @main_table.reset
229
+
230
+ @main_table.each {|row|
231
+ if @gold_index
232
+ gold = row.delete_at(@gold_index)
233
+ if @map_gold
234
+ row.push dyn_gold_obj.make(gold)
235
+ else
236
+ row.push gold
237
+ end
238
+ end
239
+
240
+ yield row
241
+ }
242
+ end
243
+
244
+ ################
245
+ # update_column
246
+ #
247
+ # update a column for all rows of this view
248
+ #
249
+ # Given a column name to be updated, and a list of value tuples,
250
+ # update each row of the view, or rather the appropriate column of each row of the view,
251
+ # with values for that row.
252
+ #
253
+ # the list has the same length as the view, as there must be a value tuple
254
+ # for each row of the view.
255
+ #
256
+ # returns: nothing
257
+ def update_column(name, # string: column name
258
+ values) # array of Objects
259
+
260
+ if @view_empty
261
+ raise "Cannot update empty view"
262
+ end
263
+
264
+ # find the first table in @table_col_pairs that has
265
+ # a column with this name
266
+ # and update that column
267
+ @table_col_pairs.each do |tc|
268
+ if (tc.columns.is_a?(Array) && tc.columns.include?(name)) || (tc.columns == "*" && tc.table_obj.list_column_names.include?(name))
269
+ table_name = tc.table_obj.table_name
270
+ # sanity check: number of update entries must match
271
+ # number of entries in this view
272
+ unless values.length == @index_tables[table_name].num_rows
273
+ $stderr.puts "Error: length of value array (#{values.length}) is not equal to length of view (#{@index_tables[table_name].num_rows})!"
274
+ exit 1
275
+ end
276
+
277
+ @index_tables[tc.table_obj.table_name].reset
278
+
279
+ values.each do |value|
280
+ index = @index_tables[table_name].fetch_row.first
281
+ tc.table_obj.update_row(index, [[name, value]])
282
+ end
283
+
284
+ return
285
+ end
286
+ end
287
+
288
+ # no match found
289
+ $stderr.puts "View.rb Error: cannot update a column that is not in this view: #{name}"
290
+ exit 1
291
+ end
292
+
293
+
294
+ ################
295
+ # each_sentence
296
+ #
297
+ # like each_hash, but it groups the row hashes sentence-wise
298
+ # sentence boundaries in the view are detected by the change in a
299
+ # special column describing sentence IDs
300
+ #
301
+ # also needs a dyngold object id
302
+ #
303
+ # returns: an array of hashes column_name -> column_value
304
+ def each_sentence(dyn_gold_id = nil) # string: ID of a DynGold object from the dynamic_feature_list, or nil
305
+
306
+ # sanity check 1: need to know what the sentence ID is
307
+ unless @parameters["sentence_id_feature"]
308
+ raise "I need the name of the sentence ID feature for each_sentence"
309
+ end
310
+ # sanity check 2: the view needs to include the sentence ID
311
+ unless column_names.include? @parameters["sentence_id_feature"]
312
+ raise "View.each_sentence: Cannot do this without sentence ID in the view"
313
+ end
314
+
315
+ last_sent_id = nil
316
+ sentence = []
317
+ each_hash(dyn_gold_id) {|row_hash|
318
+ if last_sent_id != row_hash[@parameters["sentence_id_feature"]] and
319
+ (!(last_sent_id.nil?))
320
+ yield sentence
321
+ sentence = []
322
+ end
323
+ last_sent_id = row_hash[@parameters["sentence_id_feature"]]
324
+ sentence << row_hash
325
+ }
326
+ unless sentence.empty?
327
+ yield sentence
328
+ end
329
+ end
330
+
331
+ ######################
332
+ # length
333
+ #
334
+ # returns the length of the view: the number of its rows
335
+ def length
336
+ return @index_tables[@table_col_pairs.first.table_obj.table_name].num_rows
337
+ end
338
+
339
+ ###
340
+ private
341
+
342
+ ################
343
+ # column_names
344
+ #
345
+ # returns: array:string
346
+ # the list of column names for this view
347
+ # in the right order
348
+ def column_names
349
+ if @view_empty
350
+ return []
351
+ else
352
+ return @main_table.list_column_names
353
+ end
354
+ end
355
+
356
+ ######
357
+ # fetch_dyn_gold_obj
358
+ #
359
+ # given an ID of a gold object, look for the DynGold object
360
+ # with this ID in the dynamic_feature_list and return it
361
+ # If the ID is nil, use the standard dynamic gold ID that
362
+ # has been set in the new() method.
363
+ # If that is nil too, take the non-modified gold as a
364
+ # default: return a dummy object with a make() method
365
+ # that just returns its parameter.
366
+ #
367
+ # returns: object offering a make() method
368
+
369
+ def fetch_dyn_gold_obj(dyn_gold_id) # string or nil
370
+ # find a DynGold object that will transform the gold column
371
+ if dyn_gold_id.nil?
372
+ dyn_gold_id = @parameters["standard_dyngold_id"]
373
+ end
374
+
375
+ dyn_gold_obj = "we need an object that can do 'make'"
376
+ if dyn_gold_id
377
+ unless @parameters["dynamic_feature_list"]
378
+ raise "No dynamic features given"
379
+ end
380
+
381
+ dyn_gold_obj = @parameters["dynamic_feature_list"].detect { |obj|
382
+ obj.id == dyn_gold_id
383
+ }
384
+ if dyn_gold_obj.nil?
385
+ $stderr.puts "View.rb: Unknown DynGold ID " + dyn_gold_id
386
+ $stderr.puts "Using unchanged gold"
387
+ dyn_gold_id = nil
388
+ end
389
+ end
390
+
391
+ unless dyn_gold_id
392
+ # no dynamic gold ID: use unchanged gold by default
393
+ class << dyn_gold_obj
394
+ def make(x)
395
+ x
396
+ end
397
+ def id
398
+ return "gold"
399
+ end
400
+ end
401
+ end
402
+ return dyn_gold_obj
403
+ end
404
+
405
+ def execute_command(command)
406
+ begin
407
+ return @db_obj.query(command)
408
+ rescue MysqlError => e
409
+ $stderr.puts "Error executing SQL query. Command was:\n" + command
410
+ $stderr.puts "Error code: #{e.errno}"
411
+ $stderr.puts "Error message: #{e.error}"
412
+ raise e
413
+ end
414
+ end
415
+
416
+ end