shalmaneser-lib 1.2.rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
data/lib/db/db_table.rb
ADDED
@@ -0,0 +1,237 @@
|
|
1
|
+
# class DBTable
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# Manages one table in a (given) SQL database
|
5
|
+
# Doesn't know anything about the ROSY application
|
6
|
+
# Just creating a table, changing the table, and accessing it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'db/sql_query'
|
10
|
+
# require "RosyConventions"
|
11
|
+
|
12
|
+
class DBTable
|
13
|
+
attr_reader :index_name, :table_name
|
14
|
+
|
15
|
+
#####
|
16
|
+
# new
|
17
|
+
#
|
18
|
+
# creates the table for this object.
|
19
|
+
# The name of the table (given as parameter) can be new, in which caes the table
|
20
|
+
# is created, or old, in which case we check whether its format matches the format
|
21
|
+
# given in the parameters.
|
22
|
+
#
|
23
|
+
# The table format is given in the form of column formats (column names and column formats,
|
24
|
+
# formats are the usual SQLy things). Additionally, a subset of the column names can be
|
25
|
+
# designated index columns, which means that the table is indexed (and can be searched quickly)
|
26
|
+
# for them.
|
27
|
+
#
|
28
|
+
# DBTable internally constructs a "Primary index" feature that is called "XXindexXX" (autoincrement column)
|
29
|
+
#
|
30
|
+
# For all columns that are added later using add_columns, DBTable adds a prefix to the column names;
|
31
|
+
# these columns are not checked against the column_formats when opening an existing table;
|
32
|
+
# this can be used to store experiment-specific data.
|
33
|
+
|
34
|
+
def initialize(db_obj, # DBWrapper object
|
35
|
+
table_name, # string: name of DB table (existing/new)
|
36
|
+
mode, # new: starts new DB table, removes old if it exists. open: reopens existing DB table
|
37
|
+
hash={}) # hash: parameter name => parameter value, depending on mode
|
38
|
+
# mode= new needs:
|
39
|
+
# 'col_formats': array:array len 2: string*string, [column_name, column_format]
|
40
|
+
# 'index_cols': array:string: column_names that should be used to index the table
|
41
|
+
# 'addcol_prefix': string: prefix for names of additional columns
|
42
|
+
# mode='open' needs:
|
43
|
+
# 'col_formats': array: string*string: column names/formats
|
44
|
+
# May be nil, in that case column name match isn't tested
|
45
|
+
|
46
|
+
@index_name = "XXindexXX"
|
47
|
+
@db_obj = db_obj
|
48
|
+
@table_name = table_name
|
49
|
+
|
50
|
+
case mode
|
51
|
+
when 'new'
|
52
|
+
###
|
53
|
+
# open new database
|
54
|
+
|
55
|
+
# sanity check: exactly the required parameters present?
|
56
|
+
unless hash.keys.sort == ['addcol_prefix', 'col_formats', 'index_cols']
|
57
|
+
raise "Expecting hash parameters 'addcol_prefix', 'col_formats', 'index_cols'.\n" +
|
58
|
+
"I got: " + hash.keys.join(", ")
|
59
|
+
end
|
60
|
+
|
61
|
+
# sanity check: main index column name should be unique
|
62
|
+
all_column_names = hash['col_formats'].map { |name, format| name}
|
63
|
+
if all_column_names.include? @index_name
|
64
|
+
raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
|
65
|
+
end
|
66
|
+
|
67
|
+
# sanity check: index_column_names should be included in column_names
|
68
|
+
hash['index_cols'].each { |name|
|
69
|
+
unless all_column_names.include? name
|
70
|
+
raise "[DBTable] #{name} is in the list of index names, but it isn't in the list of column names."
|
71
|
+
end
|
72
|
+
}
|
73
|
+
|
74
|
+
# does a table with name table_name exist? if so, remove it
|
75
|
+
if @db_obj.list_tables.include? table_name
|
76
|
+
# this table exists
|
77
|
+
# remove old table
|
78
|
+
@db_obj.drop_table(table_name)
|
79
|
+
end
|
80
|
+
|
81
|
+
@db_obj.create_table(table_name,hash['col_formats'],
|
82
|
+
hash['index_cols'], @index_name)
|
83
|
+
when 'open'
|
84
|
+
|
85
|
+
###
|
86
|
+
# open existing database table
|
87
|
+
|
88
|
+
# sanity check: exactly the required parameters present?
|
89
|
+
hash.keys.each { |key|
|
90
|
+
unless ['addcol_prefix', 'col_names'].include? key
|
91
|
+
raise "Expecting hash parameters 'addcol_prefix', 'col_names'.\n" +
|
92
|
+
"I got: " + hash.keys.join(", ")
|
93
|
+
end
|
94
|
+
}
|
95
|
+
# sanity check: main index column name should be unique
|
96
|
+
if hash['col_names'] && hash['col_names'].include?(@index_name)
|
97
|
+
raise "[DBTable] You used the reserved name #{@index_name} as a column name. Please don't do that!"
|
98
|
+
end
|
99
|
+
|
100
|
+
# does a table with name table_name exist?
|
101
|
+
unless @db_obj.list_tables.include? table_name
|
102
|
+
raise "[DBTable] Sorry, I cannot find a database table named #{table_name}."
|
103
|
+
end
|
104
|
+
|
105
|
+
# check if all column formats match
|
106
|
+
|
107
|
+
if hash['col_names']
|
108
|
+
|
109
|
+
existing_fields = @db_obj.list_column_names(table_name).reject { |col|
|
110
|
+
col =~ /^#{hash["addcol_prefix"]}/ or
|
111
|
+
col == @index_name
|
112
|
+
}
|
113
|
+
|
114
|
+
unless existing_fields.sort == hash["col_names"].sort
|
115
|
+
raise "[DBTable] Column names in the DB table #{table_name}\n" +
|
116
|
+
"don't match feature specification in the experiment file.\n" +
|
117
|
+
"Table:\n\t" + existing_fields.sort.join(", ") +
|
118
|
+
"\n\nExp. file:\n\t" + hash["col_names"].sort.join(", ")
|
119
|
+
end
|
120
|
+
|
121
|
+
else
|
122
|
+
# no column names given, no check of column formats
|
123
|
+
end
|
124
|
+
|
125
|
+
else
|
126
|
+
raise "Parameter 'mode' needs to be either 'new' or 'open'! I got " + mode.to_s
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
#####
|
131
|
+
# list_column_names
|
132
|
+
#
|
133
|
+
# list column names of this table
|
134
|
+
#
|
135
|
+
# returns: array:string, list of column names
|
136
|
+
def list_column_names
|
137
|
+
return @db_obj.list_column_names(@table_name)
|
138
|
+
end
|
139
|
+
|
140
|
+
#####
|
141
|
+
# list_column_formats
|
142
|
+
#
|
143
|
+
# list column names and column types of this table
|
144
|
+
#
|
145
|
+
# returns: array:string*string, list of pairs [column name, column format]
|
146
|
+
def list_column_formats
|
147
|
+
return @db_obj.list_column_formats(@table_name)
|
148
|
+
end
|
149
|
+
|
150
|
+
#####
|
151
|
+
# change_format_add_columns
|
152
|
+
#
|
153
|
+
# adds one or more columns to the table managed by this object
|
154
|
+
# columns are given by their names and formats, as above
|
155
|
+
#
|
156
|
+
# returns: nothing
|
157
|
+
def change_format_add_columns(column_formats) # array: string*string [column_name,column_format]
|
158
|
+
|
159
|
+
if column_formats.nil? or column_formats.empty?
|
160
|
+
raise "Need nonempty column_formats list"
|
161
|
+
end
|
162
|
+
|
163
|
+
column_formats.each { |col_name, col_format|
|
164
|
+
unless col_name =~ /^#{@addcol_prefix}/
|
165
|
+
raise "Columns that are added need to have prefix #{@addcol_prefix}!"
|
166
|
+
end
|
167
|
+
}
|
168
|
+
|
169
|
+
execute_command(SQLQuery.add_columns(@table_name, column_formats))
|
170
|
+
end
|
171
|
+
|
172
|
+
#####
|
173
|
+
# change_format_remove_column
|
174
|
+
#
|
175
|
+
# removes one column from the table managed by this object
|
176
|
+
#
|
177
|
+
# returns: nothing
|
178
|
+
def change_format_remove_column(column_name) # string:name of the column to remove
|
179
|
+
unless list_column_names(@table_name).include? column_name
|
180
|
+
$stderr.puts "WARNING: Cannot remove column #{column_name}: I don't have it"
|
181
|
+
return
|
182
|
+
end
|
183
|
+
|
184
|
+
execute_command("ALTER TABLE #{@table_name} DROP COLUMN #{column_name}")
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
#####
|
189
|
+
# insert_row
|
190
|
+
#
|
191
|
+
# inserts a new row into the table and fills cells with values, as specified
|
192
|
+
# by the column_value_pairs
|
193
|
+
#
|
194
|
+
# returns: nothing
|
195
|
+
def insert_row(column_value_pairs) # array: string*Object [column_name,column_value]
|
196
|
+
if column_value_pairs.nil? or column_value_pairs.empty?
|
197
|
+
raise "Need nonempty column_value_pairs list"
|
198
|
+
end
|
199
|
+
execute_command(SQLQuery.insert(@table_name,column_value_pairs))
|
200
|
+
end
|
201
|
+
|
202
|
+
#####
|
203
|
+
# update_row
|
204
|
+
#
|
205
|
+
# update column values for a given row which is identified
|
206
|
+
# via its (autoincrement) index
|
207
|
+
#
|
208
|
+
# returns: nothing
|
209
|
+
def update_row(index, # index, content of autoincrement column
|
210
|
+
column_value_pairs) # array: string*Object [column_name, column_value]
|
211
|
+
|
212
|
+
if column_value_pairs.nil? or column_value_pairs.empty?
|
213
|
+
raise "Need nonempty column_value_pairs list"
|
214
|
+
end
|
215
|
+
execute_command(SQLQuery.update(@table_name,
|
216
|
+
column_value_pairs,
|
217
|
+
[ValueRestriction.new(@index_name, index)]))
|
218
|
+
end
|
219
|
+
|
220
|
+
####
|
221
|
+
private
|
222
|
+
|
223
|
+
###
|
224
|
+
# execute_command:
|
225
|
+
# execute DB command
|
226
|
+
#
|
227
|
+
# returns nil: the commands in this package are all
|
228
|
+
# not of the kind that requires a return value
|
229
|
+
def execute_command(command)
|
230
|
+
begin
|
231
|
+
@db_obj.query_noretv(command)
|
232
|
+
rescue
|
233
|
+
$stderr.puts "Error executing SQL query. Command was:\n" + command
|
234
|
+
exit 1
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
data/lib/db/db_view.rb
ADDED
@@ -0,0 +1,416 @@
|
|
1
|
+
# class DBView
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# builds on class DBTable, which offers access to a database table
|
5
|
+
# extract views of the table (select columns, select rows)
|
6
|
+
# and offers access methods for these views.
|
7
|
+
# Rows of the table can be returned either as hashes or as arrays.
|
8
|
+
#
|
9
|
+
# There is a special column of the table (the name of which we get in the new() method),
|
10
|
+
# the gold column.
|
11
|
+
# It can be returned directly, or modified by some "dynamic feature object",
|
12
|
+
# and its value (modified or unmodified) will always be last in the array representation of a row.
|
13
|
+
|
14
|
+
require 'db/sql_query'
|
15
|
+
require "ruby_class_extensions"
|
16
|
+
# require "RosyConventions"
|
17
|
+
require 'db/select_table_and_columns'
|
18
|
+
|
19
|
+
class DBView
|
20
|
+
|
21
|
+
################
|
22
|
+
# new
|
23
|
+
#
|
24
|
+
# prepare a view.
|
25
|
+
# given a list of DB tables to access, each with its
|
26
|
+
# set of features to be returned in the view,
|
27
|
+
# a set of value restrictions,
|
28
|
+
# the name of the gold feature,
|
29
|
+
# and a list of objects that manipulate the gold feature into alternate
|
30
|
+
# gold features.
|
31
|
+
#
|
32
|
+
# value_restrictions restricts the view to those rows for which the value restrictions hold,
|
33
|
+
# e.g. only those rows where frame = Bla, or only those rows where partofspeech = Blupp
|
34
|
+
#
|
35
|
+
# The view remembers the indices of the _first_ table in the list of tables
|
36
|
+
# it is given.
|
37
|
+
#
|
38
|
+
# A standard dynamic ID can be given: DynGold objects all have an id() method,
|
39
|
+
# which returns a string, by which the use of the object can be requested
|
40
|
+
# of the view. If no dynamic ID is given below in methods each_array,
|
41
|
+
# each_hash, each_sentence, the system falls back to the standard dynamic ID.
|
42
|
+
# if none is given here, the standard DynGold object is the one that doesn't
|
43
|
+
# change the gold column. If one is given here, it will be used by default
|
44
|
+
# when no ID is given in each_hash, each_array, each_sentence
|
45
|
+
#
|
46
|
+
# The last parameter is a hash with the following optional entries:
|
47
|
+
# "gold":
|
48
|
+
# string: name of the gold feature
|
49
|
+
# If you want the gold feature to be mapped using a DynGold object,
|
50
|
+
# you need to specify this parameter -- and you need to include
|
51
|
+
# the gold feature in some feature_list.
|
52
|
+
# Warning: if a feature of this name appears in several of the
|
53
|
+
# feature lists, only the first one is mapped
|
54
|
+
# "dynamic_feature_list":
|
55
|
+
# array:DynGold objects, list of objects that map the gold feature
|
56
|
+
# to a different feature value (e.g. to "FE", "NONE")
|
57
|
+
# DynGold objects have one method make: string -> string
|
58
|
+
# that maps one gold feature,
|
59
|
+
# and one method id: -> string that gives an ID unique to this DynGold class
|
60
|
+
# and by which this DynGold class can be chosen.
|
61
|
+
# "standard_dyngold_id":
|
62
|
+
# string: standard DynGold object ID (see above)
|
63
|
+
# "sentence_id_feature":
|
64
|
+
# string: feature name for the sentence ID column, needed for each_sentence()
|
65
|
+
#
|
66
|
+
# further parameters that are passed on to SQLQuery.select: see there
|
67
|
+
|
68
|
+
def initialize(table_col_pairs, # array:SelectTableAndColumns objects
|
69
|
+
value_restrictions, # array:ValueRestriction objects
|
70
|
+
db_obj, # MySql object (from mysql.rb) that already has access to the correct database
|
71
|
+
parameters = {}) # hash with further parameters: see above
|
72
|
+
|
73
|
+
@db_obj = db_obj
|
74
|
+
@table_col_pairs = table_col_pairs
|
75
|
+
@parameters = parameters
|
76
|
+
|
77
|
+
# view empty?
|
78
|
+
if @table_col_pairs.empty? or
|
79
|
+
@table_col_pairs.big_and { |tc| tc.columns.is_a?(Array) and tc.columns.empty? }
|
80
|
+
@view_empty = true
|
81
|
+
return
|
82
|
+
else
|
83
|
+
@view_empty = false
|
84
|
+
end
|
85
|
+
|
86
|
+
# okay, we can make the view, it contains at least one table and
|
87
|
+
# at least one column:
|
88
|
+
# do one view for all columns requested, and one for the indices of each table
|
89
|
+
#
|
90
|
+
# @main_table is a DBResult object
|
91
|
+
@main_table = execute_command(SQLQuery.select(@table_col_pairs,
|
92
|
+
value_restrictions, parameters))
|
93
|
+
|
94
|
+
# index_tables: Hash: table name => DBResult object
|
95
|
+
@index_tables = {}
|
96
|
+
table_col_pairs.each_with_index { |tc, index|
|
97
|
+
# read index column of this table, add all the other tables
|
98
|
+
# with empty column lists
|
99
|
+
index_table_col_pairs = @table_col_pairs.map_with_index { |other_tc, other_index|
|
100
|
+
if other_index == index
|
101
|
+
# the current table
|
102
|
+
SelectTableAndColumns.new(tc.table_obj,
|
103
|
+
[tc.table_obj.index_name])
|
104
|
+
else
|
105
|
+
# other table: keep just the table, not the columns
|
106
|
+
SelectTableAndColumns.new(other_tc.table_obj, nil)
|
107
|
+
end
|
108
|
+
}
|
109
|
+
@index_tables[tc.table_obj.table_name] = execute_command(SQLQuery.select(index_table_col_pairs,
|
110
|
+
value_restrictions, parameters))
|
111
|
+
}
|
112
|
+
|
113
|
+
# map gold to something else?
|
114
|
+
# yes, if parameters[gold] has been set
|
115
|
+
if @parameters["gold"]
|
116
|
+
@map_gold = true
|
117
|
+
# remember which column in the DB table is the gold column
|
118
|
+
@gold_index = column_names.index(@parameters["gold"])
|
119
|
+
else
|
120
|
+
@map_gold = false
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
################
|
125
|
+
# close
|
126
|
+
#
|
127
|
+
# to be called when the view is no longer needed:
|
128
|
+
# frees the DBResult objects underlying this view
|
129
|
+
def close
|
130
|
+
unless @view_empty
|
131
|
+
@main_table.free
|
132
|
+
@index_tables.each_value { |t| t.free }
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
################
|
137
|
+
# write_to_file
|
138
|
+
#
|
139
|
+
# writes instances to a file
|
140
|
+
# each instance given as a comma-separated list of features
|
141
|
+
# The features are the ones given in my_feature_list
|
142
|
+
# (parameter to the new() method) above, in that order,
|
143
|
+
# plus (dynamic) gold, which is last.
|
144
|
+
#
|
145
|
+
# guarantees that comma is used only to separate features -- but no other
|
146
|
+
# changes in the feature values
|
147
|
+
def write_to_file(file, # stream to write to
|
148
|
+
dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
|
149
|
+
# if nil, main gold is used
|
150
|
+
|
151
|
+
each_instance_s(dyn_gold_id) { |instance_string|
|
152
|
+
file.puts instance_string
|
153
|
+
}
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
################
|
158
|
+
# each_instance_s
|
159
|
+
#
|
160
|
+
# yields each instance as a string:
|
161
|
+
# a comma-separated list of features
|
162
|
+
# The features are the ones given in my_feature_list
|
163
|
+
# (parameter to the new() method) above, in that order,
|
164
|
+
# plus (dynamic) gold, which is last.
|
165
|
+
#
|
166
|
+
# guarantees that comma is used only to separate features -- but no other
|
167
|
+
# changes in the feature values
|
168
|
+
def each_instance_s(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
|
169
|
+
# if nil, main gold is used
|
170
|
+
each_array(dyn_gold_id) {|array|
|
171
|
+
yield array.map { |entry| entry.to_s.gsub(/,/, "COMMA") }.join(",")
|
172
|
+
}
|
173
|
+
end
|
174
|
+
|
175
|
+
################
|
176
|
+
# each_hash
|
177
|
+
#
|
178
|
+
# iterates over hashes representing rows
|
179
|
+
# in each row, there is a gold key/value pair
|
180
|
+
# specified by the optional argument dyn_gold_id.
|
181
|
+
# which is the string ID of a DynGold object
|
182
|
+
# from the dynamic_feature_list.
|
183
|
+
# If arg is not present, main gold is used
|
184
|
+
#
|
185
|
+
# The key for the gold is the dyn_gold_id
|
186
|
+
# If that is nil, the key is 'gold'
|
187
|
+
#
|
188
|
+
# yields: hashes column_name -> column_value
|
189
|
+
def each_hash(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
|
190
|
+
if @view_empty
|
191
|
+
return
|
192
|
+
end
|
193
|
+
if @map_gold
|
194
|
+
dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
|
195
|
+
end
|
196
|
+
@main_table.reset
|
197
|
+
|
198
|
+
@main_table.each_hash { |row_hash|
|
199
|
+
if @map_gold
|
200
|
+
row_hash[@parameters["gold"]] = dyn_gold_obj.make(row_hash[@parameters["gold"]])
|
201
|
+
end
|
202
|
+
|
203
|
+
yield row_hash
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
################
|
208
|
+
# each_array
|
209
|
+
#
|
210
|
+
# iterates over arrays representing rows
|
211
|
+
# the last item of each row is the gold column
|
212
|
+
# selected by the optional argument dyn_gold_id.
|
213
|
+
# which is the string ID of a DynGold object
|
214
|
+
# from the dynamic_feature_list.
|
215
|
+
# If arg is not present, main gold is used
|
216
|
+
#
|
217
|
+
# yields: arrays of column values,
|
218
|
+
# values are in the order of my_feature_list given
|
219
|
+
# to the new() method, (dynamic) gold is last
|
220
|
+
def each_array(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
|
221
|
+
|
222
|
+
if @view_empty
|
223
|
+
return
|
224
|
+
end
|
225
|
+
if @map_gold
|
226
|
+
dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
|
227
|
+
end
|
228
|
+
@main_table.reset
|
229
|
+
|
230
|
+
@main_table.each {|row|
|
231
|
+
if @gold_index
|
232
|
+
gold = row.delete_at(@gold_index)
|
233
|
+
if @map_gold
|
234
|
+
row.push dyn_gold_obj.make(gold)
|
235
|
+
else
|
236
|
+
row.push gold
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
yield row
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
################
|
245
|
+
# update_column
|
246
|
+
#
|
247
|
+
# update a column for all rows of this view
|
248
|
+
#
|
249
|
+
# Given a column name to be updated, and a list of value tuples,
|
250
|
+
# update each row of the view, or rather the appropriate column of each row of the view,
|
251
|
+
# with values for that row.
|
252
|
+
#
|
253
|
+
# the list has the same length as the view, as there must be a value tuple
|
254
|
+
# for each row of the view.
|
255
|
+
#
|
256
|
+
# returns: nothing
|
257
|
+
def update_column(name, # string: column name
|
258
|
+
values) # array of Objects
|
259
|
+
|
260
|
+
if @view_empty
|
261
|
+
raise "Cannot update empty view"
|
262
|
+
end
|
263
|
+
|
264
|
+
# find the first table in @table_col_pairs that has
|
265
|
+
# a column with this name
|
266
|
+
# and update that column
|
267
|
+
@table_col_pairs.each do |tc|
|
268
|
+
if (tc.columns.is_a?(Array) && tc.columns.include?(name)) || (tc.columns == "*" && tc.table_obj.list_column_names.include?(name))
|
269
|
+
table_name = tc.table_obj.table_name
|
270
|
+
# sanity check: number of update entries must match
|
271
|
+
# number of entries in this view
|
272
|
+
unless values.length == @index_tables[table_name].num_rows
|
273
|
+
$stderr.puts "Error: length of value array (#{values.length}) is not equal to length of view (#{@index_tables[table_name].num_rows})!"
|
274
|
+
exit 1
|
275
|
+
end
|
276
|
+
|
277
|
+
@index_tables[tc.table_obj.table_name].reset
|
278
|
+
|
279
|
+
values.each do |value|
|
280
|
+
index = @index_tables[table_name].fetch_row.first
|
281
|
+
tc.table_obj.update_row(index, [[name, value]])
|
282
|
+
end
|
283
|
+
|
284
|
+
return
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# no match found
|
289
|
+
$stderr.puts "View.rb Error: cannot update a column that is not in this view: #{name}"
|
290
|
+
exit 1
|
291
|
+
end
|
292
|
+
|
293
|
+
|
294
|
+
################
|
295
|
+
# each_sentence
|
296
|
+
#
|
297
|
+
# like each_hash, but it groups the row hashes sentence-wise
|
298
|
+
# sentence boundaries in the view are detected by the change in a
|
299
|
+
# special column describing sentence IDs
|
300
|
+
#
|
301
|
+
# also needs a dyngold object id
|
302
|
+
#
|
303
|
+
# returns: an array of hashes column_name -> column_value
|
304
|
+
def each_sentence(dyn_gold_id = nil) # string: ID of a DynGold object from the dynamic_feature_list, or nil
|
305
|
+
|
306
|
+
# sanity check 1: need to know what the sentence ID is
|
307
|
+
unless @parameters["sentence_id_feature"]
|
308
|
+
raise "I need the name of the sentence ID feature for each_sentence"
|
309
|
+
end
|
310
|
+
# sanity check 2: the view needs to include the sentence ID
|
311
|
+
unless column_names.include? @parameters["sentence_id_feature"]
|
312
|
+
raise "View.each_sentence: Cannot do this without sentence ID in the view"
|
313
|
+
end
|
314
|
+
|
315
|
+
last_sent_id = nil
|
316
|
+
sentence = []
|
317
|
+
each_hash(dyn_gold_id) {|row_hash|
|
318
|
+
if last_sent_id != row_hash[@parameters["sentence_id_feature"]] and
|
319
|
+
(!(last_sent_id.nil?))
|
320
|
+
yield sentence
|
321
|
+
sentence = []
|
322
|
+
end
|
323
|
+
last_sent_id = row_hash[@parameters["sentence_id_feature"]]
|
324
|
+
sentence << row_hash
|
325
|
+
}
|
326
|
+
unless sentence.empty?
|
327
|
+
yield sentence
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
######################
|
332
|
+
# length
|
333
|
+
#
|
334
|
+
# returns the length of the view: the number of its rows
|
335
|
+
def length
|
336
|
+
return @index_tables[@table_col_pairs.first.table_obj.table_name].num_rows
|
337
|
+
end
|
338
|
+
|
339
|
+
###
|
340
|
+
private
|
341
|
+
|
342
|
+
################
|
343
|
+
# column_names
|
344
|
+
#
|
345
|
+
# returns: array:string
|
346
|
+
# the list of column names for this view
|
347
|
+
# in the right order
|
348
|
+
def column_names
|
349
|
+
if @view_empty
|
350
|
+
return []
|
351
|
+
else
|
352
|
+
return @main_table.list_column_names
|
353
|
+
end
|
354
|
+
end
|
355
|
+
|
356
|
+
######
|
357
|
+
# fetch_dyn_gold_obj
|
358
|
+
#
|
359
|
+
# given an ID of a gold object, look for the DynGold object
|
360
|
+
# with this ID in the dynamic_feature_list and return it
|
361
|
+
# If the ID is nil, use the standard dynamic gold ID that
|
362
|
+
# has been set in the new() method.
|
363
|
+
# If that is nil too, take the non-modified gold as a
|
364
|
+
# default: return a dummy object with a make() method
|
365
|
+
# that just returns its parameter.
|
366
|
+
#
|
367
|
+
# returns: object offering a make() method
|
368
|
+
|
369
|
+
def fetch_dyn_gold_obj(dyn_gold_id) # string or nil
|
370
|
+
# find a DynGold object that will transform the gold column
|
371
|
+
if dyn_gold_id.nil?
|
372
|
+
dyn_gold_id = @parameters["standard_dyngold_id"]
|
373
|
+
end
|
374
|
+
|
375
|
+
dyn_gold_obj = "we need an object that can do 'make'"
|
376
|
+
if dyn_gold_id
|
377
|
+
unless @parameters["dynamic_feature_list"]
|
378
|
+
raise "No dynamic features given"
|
379
|
+
end
|
380
|
+
|
381
|
+
dyn_gold_obj = @parameters["dynamic_feature_list"].detect { |obj|
|
382
|
+
obj.id == dyn_gold_id
|
383
|
+
}
|
384
|
+
if dyn_gold_obj.nil?
|
385
|
+
$stderr.puts "View.rb: Unknown DynGold ID " + dyn_gold_id
|
386
|
+
$stderr.puts "Using unchanged gold"
|
387
|
+
dyn_gold_id = nil
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
unless dyn_gold_id
|
392
|
+
# no dynamic gold ID: use unchanged gold by default
|
393
|
+
class << dyn_gold_obj
|
394
|
+
def make(x)
|
395
|
+
x
|
396
|
+
end
|
397
|
+
def id
|
398
|
+
return "gold"
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
402
|
+
return dyn_gold_obj
|
403
|
+
end
|
404
|
+
|
405
|
+
def execute_command(command)
|
406
|
+
begin
|
407
|
+
return @db_obj.query(command)
|
408
|
+
rescue MysqlError => e
|
409
|
+
$stderr.puts "Error executing SQL query. Command was:\n" + command
|
410
|
+
$stderr.puts "Error code: #{e.errno}"
|
411
|
+
$stderr.puts "Error message: #{e.error}"
|
412
|
+
raise e
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
416
|
+
end
|