frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
module TargetsMostFrequentSc
|
2
|
+
def determine_target_most_frequent_sc(view,
|
3
|
+
noval,
|
4
|
+
with_frame_default = nil)
|
5
|
+
target_subcat = Hash.new()
|
6
|
+
frame_subcat = Hash.new()
|
7
|
+
|
8
|
+
view.each_sentence { |sentence|
|
9
|
+
|
10
|
+
target = tmf_target_key(sentence.first)
|
11
|
+
frame = sentence.first["frame"]
|
12
|
+
subcat = []
|
13
|
+
# each instance: count individual Gf
|
14
|
+
# add to sentencewide string
|
15
|
+
sentence.each { |inst|
|
16
|
+
if inst["fn_gf"] != noval and inst["fn_gf"] != "target"
|
17
|
+
subcat << inst["fn_gf"]
|
18
|
+
end
|
19
|
+
} # each instance of sentence
|
20
|
+
|
21
|
+
subcat.sort!
|
22
|
+
subcat.uniq!
|
23
|
+
|
24
|
+
# count sentwise GF for target
|
25
|
+
if target_subcat[target].nil?
|
26
|
+
target_subcat[target] = Hash.new(0)
|
27
|
+
end
|
28
|
+
target_subcat[target][subcat.join("_")] += 1
|
29
|
+
|
30
|
+
# count same for frame
|
31
|
+
if frame_subcat[frame].nil?
|
32
|
+
frame_subcat[frame] = Hash.new(0)
|
33
|
+
end
|
34
|
+
frame_subcat[frame][subcat.join("_")] += 1
|
35
|
+
} # each sentence of view
|
36
|
+
|
37
|
+
# most frequent subcat for each target:
|
38
|
+
retv = Hash.new()
|
39
|
+
retv2 = Hash.new()
|
40
|
+
[[retv, target_subcat], [retv2, frame_subcat]].each { |out_hash, in_hash|
|
41
|
+
|
42
|
+
in_hash.each_pair { |key, subcats|
|
43
|
+
most_frequent_subcat = subcats.to_a.max { |a,b| a.last <=> b.last }.first
|
44
|
+
out_hash[key] = most_frequent_subcat
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
if with_frame_default
|
49
|
+
return [retv, retv2]
|
50
|
+
else
|
51
|
+
return retv
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def tmf_target_key(instance)
|
56
|
+
return instance["frame"] + "." +
|
57
|
+
instance["target"] + "." +
|
58
|
+
instance["target_pos"]
|
59
|
+
end
|
60
|
+
end
|
data/lib/rosy/View.rb
ADDED
@@ -0,0 +1,418 @@
|
|
1
|
+
# class DBView
|
2
|
+
# KE, SP 27.1.05
|
3
|
+
#
|
4
|
+
# builds on class DBTable, which offers access to a database table
|
5
|
+
# extract views of the table (select columns, select rows)
|
6
|
+
# and offers access methods for these views.
|
7
|
+
# Rows of the table can be returned either as hashes or as arrays.
|
8
|
+
#
|
9
|
+
# There is a special column of the table (the name of which we get in the new() method),
|
10
|
+
# the gold column.
|
11
|
+
# It can be returned directly, or modified by some "dynamic feature object",
|
12
|
+
# and its value (modified or unmodified) will always be last in the array representation of a row.
|
13
|
+
|
14
|
+
require "common/SQLQuery"
|
15
|
+
require "common/ruby_class_extensions"
|
16
|
+
require "common/RosyConventions"
|
17
|
+
|
18
|
+
class DBView
|
19
|
+
|
20
|
+
################
|
21
|
+
# new
|
22
|
+
#
|
23
|
+
# prepare a view.
|
24
|
+
# given a list of DB tables to access, each with its
|
25
|
+
# set of features to be returned in the view,
|
26
|
+
# a set of value restrictions,
|
27
|
+
# the name of the gold feature,
|
28
|
+
# and a list of objects that manipulate the gold feature into alternate
|
29
|
+
# gold features.
|
30
|
+
#
|
31
|
+
# value_restrictions restricts the view to those rows for which the value restrictions hold,
|
32
|
+
# e.g. only those rows where frame = Bla, or only those rows where partofspeech = Blupp
|
33
|
+
#
|
34
|
+
# The view remembers the indices of the _first_ table in the list of tables
|
35
|
+
# it is given.
|
36
|
+
#
|
37
|
+
# A standard dynamic ID can be given: DynGold objects all have an id() method,
|
38
|
+
# which returns a string, by which the use of the object can be requested
|
39
|
+
# of the view. If no dynamic ID is given below in methods each_array,
|
40
|
+
# each_hash, each_sentence, the system falls back to the standard dynamic ID.
|
41
|
+
# if none is given here, the standard DynGold object is the one that doesn't
|
42
|
+
# change the gold column. If one is given here, it will be used by default
|
43
|
+
# when no ID is given in each_hash, each_array, each_sentence
|
44
|
+
#
|
45
|
+
# The last parameter is a hash with the following optional entries:
|
46
|
+
# "gold":
|
47
|
+
# string: name of the gold feature
|
48
|
+
# If you want the gold feature to be mapped using a DynGold object,
|
49
|
+
# you need to specify this parameter -- and you need to include
|
50
|
+
# the gold feature in some feature_list.
|
51
|
+
# Warning: if a feature of this name appears in several of the
|
52
|
+
# feature lists, only the first one is mapped
|
53
|
+
# "dynamic_feature_list":
|
54
|
+
# array:DynGold objects, list of objects that map the gold feature
|
55
|
+
# to a different feature value (e.g. to "FE", "NONE")
|
56
|
+
# DynGold objects have one method make: string -> string
|
57
|
+
# that maps one gold feature,
|
58
|
+
# and one method id: -> string that gives an ID unique to this DynGold class
|
59
|
+
# and by which this DynGold class can be chosen.
|
60
|
+
# "standard_dyngold_id":
|
61
|
+
# string: standard DynGold object ID (see above)
|
62
|
+
# "sentence_id_feature":
|
63
|
+
# string: feature name for the sentence ID column, needed for each_sentence()
|
64
|
+
#
|
65
|
+
# further parameters that are passed on to SQLQuery.select: see there
|
66
|
+
|
67
|
+
def initialize(table_col_pairs, # array:SelectTableAndColumns objects
|
68
|
+
value_restrictions, # array:ValueRestriction objects
|
69
|
+
db_obj, # MySql object (from mysql.rb) that already has access to the correct database
|
70
|
+
parameters = {}) # hash with further parameters: see above
|
71
|
+
|
72
|
+
@db_obj = db_obj
|
73
|
+
@table_col_pairs = table_col_pairs
|
74
|
+
@parameters = parameters
|
75
|
+
|
76
|
+
# view empty?
|
77
|
+
if @table_col_pairs.empty? or
|
78
|
+
@table_col_pairs.big_and { |tc| tc.columns.class.to_s == "Array" and tc.columns.empty? }
|
79
|
+
@view_empty = true
|
80
|
+
return
|
81
|
+
else
|
82
|
+
@view_empty = false
|
83
|
+
end
|
84
|
+
|
85
|
+
# okay, we can make the view, it contains at least one table and
|
86
|
+
# at least one column:
|
87
|
+
# do one view for all columns requested, and one for the indices of each table
|
88
|
+
#
|
89
|
+
# @main_table is a DBResult object
|
90
|
+
@main_table = execute_command(SQLQuery.select(@table_col_pairs,
|
91
|
+
value_restrictions, parameters))
|
92
|
+
|
93
|
+
# index_tables: Hash: table name => DBResult object
|
94
|
+
@index_tables = Hash.new
|
95
|
+
table_col_pairs.each_with_index { |tc, index|
|
96
|
+
# read index column of this table, add all the other tables
|
97
|
+
# with empty column lists
|
98
|
+
index_table_col_pairs = @table_col_pairs.map_with_index { |other_tc, other_index|
|
99
|
+
if other_index == index
|
100
|
+
# the current table
|
101
|
+
SelectTableAndColumns.new(tc.table_obj,
|
102
|
+
[tc.table_obj.index_name])
|
103
|
+
else
|
104
|
+
# other table: keep just the table, not the columns
|
105
|
+
SelectTableAndColumns.new(other_tc.table_obj, nil)
|
106
|
+
end
|
107
|
+
}
|
108
|
+
@index_tables[tc.table_obj.table_name] = execute_command(SQLQuery.select(index_table_col_pairs,
|
109
|
+
value_restrictions, parameters))
|
110
|
+
}
|
111
|
+
|
112
|
+
# map gold to something else?
|
113
|
+
# yes, if parameters[gold] has been set
|
114
|
+
if @parameters["gold"]
|
115
|
+
@map_gold = true
|
116
|
+
# remember which column in the DB table is the gold column
|
117
|
+
@gold_index = column_names().index(@parameters["gold"])
|
118
|
+
else
|
119
|
+
@map_gold = false
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
################
|
124
|
+
# close
|
125
|
+
#
|
126
|
+
# to be called when the view is no longer needed:
|
127
|
+
# frees the DBResult objects underlying this view
|
128
|
+
def close()
|
129
|
+
unless @view_empty
|
130
|
+
@main_table.free()
|
131
|
+
@index_tables.each_value { |t| t.free() }
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
################
|
136
|
+
# write_to_file
|
137
|
+
#
|
138
|
+
# writes instances to a file
|
139
|
+
# each instance given as a comma-separated list of features
|
140
|
+
# The features are the ones given in my_feature_list
|
141
|
+
# (parameter to the new() method) above, in that order,
|
142
|
+
# plus (dynamic) gold, which is last.
|
143
|
+
#
|
144
|
+
# guarantees that comma is used only to separate features -- but no other
|
145
|
+
# changes in the feature values
|
146
|
+
def write_to_file(file, # stream to write to
|
147
|
+
dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
|
148
|
+
# if nil, main gold is used
|
149
|
+
|
150
|
+
each_instance_s(dyn_gold_id) { |instance_string|
|
151
|
+
file.puts instance_string
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
################
|
157
|
+
# each_instance_s
|
158
|
+
#
|
159
|
+
# yields each instance as a string:
|
160
|
+
# a comma-separated list of features
|
161
|
+
# The features are the ones given in my_feature_list
|
162
|
+
# (parameter to the new() method) above, in that order,
|
163
|
+
# plus (dynamic) gold, which is last.
|
164
|
+
#
|
165
|
+
# guarantees that comma is used only to separate features -- but no other
|
166
|
+
# changes in the feature values
|
167
|
+
def each_instance_s(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list.
|
168
|
+
# if nil, main gold is used
|
169
|
+
each_array(dyn_gold_id) {|array|
|
170
|
+
yield array.map { |entry| entry.to_s.gsub(/,/, "COMMA") }.join(",")
|
171
|
+
}
|
172
|
+
end
|
173
|
+
|
174
|
+
################
|
175
|
+
# each_hash
|
176
|
+
#
|
177
|
+
# iterates over hashes representing rows
|
178
|
+
# in each row, there is a gold key/value pair
|
179
|
+
# specified by the optional argument dyn_gold_id.
|
180
|
+
# which is the string ID of a DynGold object
|
181
|
+
# from the dynamic_feature_list.
|
182
|
+
# If arg is not present, main gold is used
|
183
|
+
#
|
184
|
+
# The key for the gold is the dyn_gold_id
|
185
|
+
# If that is nil, the key is 'gold'
|
186
|
+
#
|
187
|
+
# yields: hashes column_name -> column_value
|
188
|
+
def each_hash(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
|
189
|
+
if @view_empty
|
190
|
+
return
|
191
|
+
end
|
192
|
+
if @map_gold
|
193
|
+
dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
|
194
|
+
end
|
195
|
+
@main_table.reset()
|
196
|
+
|
197
|
+
@main_table.each_hash { |row_hash|
|
198
|
+
if @map_gold
|
199
|
+
row_hash[@parameters["gold"]] = dyn_gold_obj.make(row_hash[@parameters["gold"]])
|
200
|
+
end
|
201
|
+
|
202
|
+
yield row_hash
|
203
|
+
}
|
204
|
+
end
|
205
|
+
|
206
|
+
################
|
207
|
+
# each_array
|
208
|
+
#
|
209
|
+
# iterates over arrays representing rows
|
210
|
+
# the last item of each row is the gold column
|
211
|
+
# selected by the optional argument dyn_gold_id.
|
212
|
+
# which is the string ID of a DynGold object
|
213
|
+
# from the dynamic_feature_list.
|
214
|
+
# If arg is not present, main gold is used
|
215
|
+
#
|
216
|
+
# yields: arrays of column values,
|
217
|
+
# values are in the order of my_feature_list given
|
218
|
+
# to the new() method, (dynamic) gold is last
|
219
|
+
def each_array(dyn_gold_id=nil) #string: ID of a DynGold object from the dynamic_feature_list, or nil
|
220
|
+
|
221
|
+
if @view_empty
|
222
|
+
return
|
223
|
+
end
|
224
|
+
if @map_gold
|
225
|
+
dyn_gold_obj = fetch_dyn_gold_obj(dyn_gold_id)
|
226
|
+
end
|
227
|
+
@main_table.reset()
|
228
|
+
|
229
|
+
@main_table.each {|row|
|
230
|
+
if @gold_index
|
231
|
+
gold = row.delete_at(@gold_index)
|
232
|
+
if @map_gold
|
233
|
+
row.push dyn_gold_obj.make(gold)
|
234
|
+
else
|
235
|
+
row.push gold
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
yield row
|
240
|
+
}
|
241
|
+
end
|
242
|
+
|
243
|
+
################
|
244
|
+
# update_column
|
245
|
+
#
|
246
|
+
# update a column for all rows of this view
|
247
|
+
#
|
248
|
+
# Given a column name to be updated, and a list of value tuples,
|
249
|
+
# update each row of the view, or rather the appropriate column of each row of the view,
|
250
|
+
# with values for that row.
|
251
|
+
#
|
252
|
+
# the list has the same length as the view, as there must be a value tuple
|
253
|
+
# for each row of the view.
|
254
|
+
#
|
255
|
+
# returns: nothing
|
256
|
+
def update_column(name, # string: column name
|
257
|
+
values) # array of Objects
|
258
|
+
|
259
|
+
if @view_empty
|
260
|
+
raise "Cannot update empty view"
|
261
|
+
end
|
262
|
+
|
263
|
+
# find the first table in @table_col_pairs that has
|
264
|
+
# a column with this name
|
265
|
+
# and update that column
|
266
|
+
@table_col_pairs.each { |tc|
|
267
|
+
if (tc.columns.class.to_s == "Array" and tc.columns.include? name) or
|
268
|
+
(tc.columns == "*" and tc.table_obj.list_column_names().include? name)
|
269
|
+
|
270
|
+
table_name = tc.table_obj.table_name
|
271
|
+
|
272
|
+
# sanity check: number of update entries must match
|
273
|
+
# number of entries in this view
|
274
|
+
unless values.length() == @index_tables[table_name].num_rows()
|
275
|
+
$stderr.puts "Error: length of value array (#{values.length}) is not equal to length of view (#{@index_tables[table_name].num_rows})!"
|
276
|
+
exit 1
|
277
|
+
end
|
278
|
+
|
279
|
+
@index_tables[tc.table_obj.table_name].reset()
|
280
|
+
|
281
|
+
values.each { |value|
|
282
|
+
index = @index_tables[table_name].fetch_row().first
|
283
|
+
tc.table_obj.update_row(index, [[name, value]])
|
284
|
+
}
|
285
|
+
|
286
|
+
return
|
287
|
+
end
|
288
|
+
}
|
289
|
+
|
290
|
+
# no match found
|
291
|
+
$stderr.puts "View.rb Error: cannot update a column that is not in this view: #{name}"
|
292
|
+
exit 1
|
293
|
+
end
|
294
|
+
|
295
|
+
|
296
|
+
################
|
297
|
+
# each_sentence
|
298
|
+
#
|
299
|
+
# like each_hash, but it groups the row hashes sentence-wise
|
300
|
+
# sentence boundaries in the view are detected by the change in a
|
301
|
+
# special column describing sentence IDs
|
302
|
+
#
|
303
|
+
# also needs a dyngold object id
|
304
|
+
#
|
305
|
+
# returns: an array of hashes column_name -> column_value
|
306
|
+
def each_sentence(dyn_gold_id = nil) # string: ID of a DynGold object from the dynamic_feature_list, or nil
|
307
|
+
|
308
|
+
# sanity check 1: need to know what the sentence ID is
|
309
|
+
unless @parameters["sentence_id_feature"]
|
310
|
+
raise "I need the name of the sentence ID feature for each_sentence()"
|
311
|
+
end
|
312
|
+
# sanity check 2: the view needs to include the sentence ID
|
313
|
+
unless column_names().include? @parameters["sentence_id_feature"]
|
314
|
+
raise "View.each_sentence: Cannot do this without sentence ID in the view"
|
315
|
+
end
|
316
|
+
|
317
|
+
last_sent_id = nil
|
318
|
+
sentence = Array.new
|
319
|
+
each_hash(dyn_gold_id) {|row_hash|
|
320
|
+
if last_sent_id != row_hash[@parameters["sentence_id_feature"]] and
|
321
|
+
(!(last_sent_id.nil?))
|
322
|
+
yield sentence
|
323
|
+
sentence = Array.new
|
324
|
+
end
|
325
|
+
last_sent_id = row_hash[@parameters["sentence_id_feature"]]
|
326
|
+
sentence << row_hash
|
327
|
+
}
|
328
|
+
unless sentence.empty?
|
329
|
+
yield sentence
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
######################
|
334
|
+
# length
|
335
|
+
#
|
336
|
+
# returns the length of the view: the number of its rows
|
337
|
+
def length()
|
338
|
+
return @index_tables[@table_col_pairs.first.table_obj.table_name].num_rows
|
339
|
+
end
|
340
|
+
|
341
|
+
###
|
342
|
+
private
|
343
|
+
|
344
|
+
################
|
345
|
+
# column_names
|
346
|
+
#
|
347
|
+
# returns: array:string
|
348
|
+
# the list of column names for this view
|
349
|
+
# in the right order
|
350
|
+
def column_names()
|
351
|
+
if @view_empty
|
352
|
+
return []
|
353
|
+
else
|
354
|
+
return @main_table.list_column_names()
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
######
|
359
|
+
# fetch_dyn_gold_obj
|
360
|
+
#
|
361
|
+
# given an ID of a gold object, look for the DynGold object
|
362
|
+
# with this ID in the dynamic_feature_list and return it
|
363
|
+
# If the ID is nil, use the standard dynamic gold ID that
|
364
|
+
# has been set in the new() method.
|
365
|
+
# If that is nil too, take the non-modified gold as a
|
366
|
+
# default: return a dummy object with a make() method
|
367
|
+
# that just returns its parameter.
|
368
|
+
#
|
369
|
+
# returns: object offering a make() method
|
370
|
+
|
371
|
+
def fetch_dyn_gold_obj(dyn_gold_id) # string or nil
|
372
|
+
# find a DynGold object that will transform the gold column
|
373
|
+
if dyn_gold_id.nil?
|
374
|
+
dyn_gold_id = @parameters["standard_dyngold_id"]
|
375
|
+
end
|
376
|
+
|
377
|
+
dyn_gold_obj = "we need an object that can do 'make'"
|
378
|
+
if dyn_gold_id
|
379
|
+
unless @parameters["dynamic_feature_list"]
|
380
|
+
raise "No dynamic features given"
|
381
|
+
end
|
382
|
+
|
383
|
+
dyn_gold_obj = @parameters["dynamic_feature_list"].detect { |obj|
|
384
|
+
obj.id() == dyn_gold_id
|
385
|
+
}
|
386
|
+
if dyn_gold_obj.nil?
|
387
|
+
$stderr.puts "View.rb: Unknown DynGold ID " + dyn_gold_id
|
388
|
+
$stderr.puts "Using unchanged gold"
|
389
|
+
dyn_gold_id = nil
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
unless dyn_gold_id
|
394
|
+
# no dynamic gold ID: use unchanged gold by default
|
395
|
+
class << dyn_gold_obj
|
396
|
+
def make(x)
|
397
|
+
x
|
398
|
+
end
|
399
|
+
def id()
|
400
|
+
return "gold"
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
return dyn_gold_obj
|
405
|
+
end
|
406
|
+
|
407
|
+
def execute_command(command)
|
408
|
+
begin
|
409
|
+
return @db_obj.query(command)
|
410
|
+
rescue MysqlError => e
|
411
|
+
$stderr.puts "Error executing SQL query. Command was:\n" + command
|
412
|
+
$stderr.puts "Error code: #{e.errno}"
|
413
|
+
$stderr.puts "Error message: #{e.error}"
|
414
|
+
raise e
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
end
|