frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,477 @@
|
|
1
|
+
# RosyIterator
|
2
|
+
# KE May 2005
|
3
|
+
#
|
4
|
+
# RosyIterator is a class that
|
5
|
+
# * reads the "xwise" parameters in the experiment file to
|
6
|
+
# determine the portions in which data is to be fed to classifiers,
|
7
|
+
# and offers an iterator that iterates through every group to
|
8
|
+
# be trained/tested on
|
9
|
+
# * constructs views matching the given "xwise" group.
|
10
|
+
#
|
11
|
+
# RosyIterator incorporates the following services:
|
12
|
+
# - choosing the right DB table, depending on
|
13
|
+
# whether training/test data is being accessed,
|
14
|
+
# and with or without a splitlog
|
15
|
+
# - making and adding all currently available Dynamic Gold objects
|
16
|
+
# (i.e. objects that are capable of mapping the gold column to
|
17
|
+
# something else)
|
18
|
+
# - initializing a view, potentially modified depending on the assignment step:
|
19
|
+
# argrec -> use dynamic gold, mapping gold labels to "FE" or "NONE"
|
20
|
+
# arglab -> use only those rows that have "FE" assigned from the argrec step
|
21
|
+
#
|
22
|
+
# Setting "xwise": An "xwise" entry in the hash passed on to RosyIterator.new()
|
23
|
+
# overrides all other settings. If that isn't given, the "xwise_" + step
|
24
|
+
# (xwise_argrec, xwise_arglab, xwise_onestep) from the experiment file is read.
|
25
|
+
# If that hasn't been set either, the default is frame-wise.
|
26
|
+
|
27
|
+
require 'common/ruby_class_extensions'
|
28
|
+
|
29
|
+
require 'rosy/View'
|
30
|
+
require "common/RosyConventions"
|
31
|
+
require "rosy/RosyPruning"
|
32
|
+
require "rosy/RosySplit"
|
33
|
+
require "rosy/RosyTrainingTestTable"
|
34
|
+
|
35
|
+
class RosyIterator
|
36
|
+
|
37
|
+
###
|
38
|
+
# new
|
39
|
+
#
|
40
|
+
# open the correct database table,
|
41
|
+
# initialize Dynamic Gold objects
|
42
|
+
|
43
|
+
|
44
|
+
def initialize(ttt_obj, # RosyTrainingTestTable object
|
45
|
+
exp, # RosyConfigData object: experiment file
|
46
|
+
dataset, # string: train/test
|
47
|
+
var_hash = {}) # further arguments:
|
48
|
+
# step: string: argrec/arglab/onestep, or nil (= no manipulation of the view)
|
49
|
+
# testID: string: ID of test set, or nil
|
50
|
+
# splitID string: splitlog ID, or nil if no split is to be used
|
51
|
+
# xwise: string: containing any subset of frame/target_pos/target joined by spaces,
|
52
|
+
# overrides @exp.get("xwise_" + @step) if non-nil
|
53
|
+
# prune: boolean: if pruning has been chosen in the experiment file,
|
54
|
+
# make a value restriction that omits pruned instances
|
55
|
+
|
56
|
+
@exp = exp
|
57
|
+
@dataset = dataset
|
58
|
+
@ttt_obj = ttt_obj
|
59
|
+
@splitID = var_hash["splitID"]
|
60
|
+
@step = var_hash["step"]
|
61
|
+
@testID = var_hash["testID"]
|
62
|
+
|
63
|
+
# object variables we are going to use below
|
64
|
+
@db_table = nil # DB table we are working on
|
65
|
+
@allcolnames = nil # names of all columns of first and potentially second table
|
66
|
+
|
67
|
+
@dyn_gold_objects = nil # list of dynamic gold-producing object
|
68
|
+
@standard_dyngold_id = nil # ID of standard dyngold obj to use
|
69
|
+
|
70
|
+
@standard_value_restrictions = [] #value restrictions to use with each view
|
71
|
+
|
72
|
+
@second_table = nil # read view from 2 tables? if so, DBTable object for 2nd table
|
73
|
+
@use_cols_from_second_table = nil # array: names of columns from 2nd table
|
74
|
+
@second_table_colprefix = nil # string: prefix for columns from 2nd table
|
75
|
+
|
76
|
+
@xwise = nil # array: read data one X at a time (forms groups)
|
77
|
+
@groups = nil # distinct values for X from xwise
|
78
|
+
@current_group = nil # current group (will be set by iterator each_group)
|
79
|
+
|
80
|
+
##
|
81
|
+
# open the right database table
|
82
|
+
if @dataset == "train" or @splitID
|
83
|
+
@db_table = @ttt_obj.existing_train_table()
|
84
|
+
|
85
|
+
else
|
86
|
+
unless @testID
|
87
|
+
raise "cannot open the test table without test ID"
|
88
|
+
end
|
89
|
+
@db_table = @ttt_obj.existing_test_table(@testID)
|
90
|
+
end
|
91
|
+
@allcolnames = @db_table.list_column_names()
|
92
|
+
|
93
|
+
##
|
94
|
+
# make dynamic gold objects
|
95
|
+
@dyn_gold_objects = Array.new
|
96
|
+
@dyn_gold_objects << DynGoldBinary.new(@exp.get("noval"))
|
97
|
+
|
98
|
+
###
|
99
|
+
# what is the standard gold column to be returned?
|
100
|
+
if @step == "argrec"
|
101
|
+
# argument recognition: distinguish just "FE", "NONE" as gold
|
102
|
+
@standard_dyngold_id = "binary_gold"
|
103
|
+
end
|
104
|
+
|
105
|
+
##
|
106
|
+
# if splitID has been set,
|
107
|
+
# make additional restrictions on the column values
|
108
|
+
if @splitID
|
109
|
+
# get split table name
|
110
|
+
@second_table = @ttt_obj.existing_split_table(@splitID, @dataset, RosySplit.split_index_colname())
|
111
|
+
|
112
|
+
# additional value restriction:
|
113
|
+
# only use rows whose sentence ID also appears in the split table
|
114
|
+
# (i.e. rows included in the split)
|
115
|
+
@standard_value_restrictions << RosySplit.make_join_restriction(@splitID,
|
116
|
+
@db_table,
|
117
|
+
@dataset,
|
118
|
+
@ttt_obj)
|
119
|
+
|
120
|
+
# additional column names:
|
121
|
+
# those of the second table (but remove duplicates)
|
122
|
+
@allcolnames.concat @ttt_obj.existing_split_table(@splitID, @dataset, RosySplit.split_index_colname()).list_column_names()
|
123
|
+
@allcolnames.uniq!
|
124
|
+
|
125
|
+
|
126
|
+
# if we're using a split, read the phase 2 features and the classification results
|
127
|
+
# from the split table rather than from the main table:
|
128
|
+
# @use_cols_from_second_table is a list of column names (strings)
|
129
|
+
# to take from the 2nd table
|
130
|
+
# @second_table_colprefix is a string: all columns starting with this prefix
|
131
|
+
# are taken from the 2nd table
|
132
|
+
@use_cols_from_second_table = [ RosySplit.split_index_colname() ]
|
133
|
+
@second_table_colprefix = @exp.get("classif_column_name")
|
134
|
+
end
|
135
|
+
|
136
|
+
###
|
137
|
+
# Any (row) value restrictions to be imposed
|
138
|
+
# on all views we generate?
|
139
|
+
if @step == "arglab"
|
140
|
+
# argument labeling: use as input only those lines
|
141
|
+
# for which argrec-label is "FE"
|
142
|
+
|
143
|
+
if @exp.get("assume_argrec_perfect")
|
144
|
+
# assume perfect argrec step:
|
145
|
+
# take all rows where gold is not "noval"
|
146
|
+
@standard_value_restrictions << ValueRestriction.new(@db_table.table_name + ".gold",
|
147
|
+
@exp.get("noval"),
|
148
|
+
"posneg" => "!=")
|
149
|
+
else
|
150
|
+
# use argrec step as is:
|
151
|
+
# take all rows where the argrec result is "FE"
|
152
|
+
|
153
|
+
case @dataset
|
154
|
+
when "train"
|
155
|
+
run_column_name = @ttt_obj.existing_runlog("argrec", "train", nil, @splitID)
|
156
|
+
when "test"
|
157
|
+
run_column_name = @ttt_obj.existing_runlog("argrec", "test", @testID, @splitID)
|
158
|
+
else
|
159
|
+
raise "Shouldn't be here"
|
160
|
+
end
|
161
|
+
|
162
|
+
if run_column_name.nil?
|
163
|
+
$stderr.puts "Missing: argrec classification results on #{@dataset} data."
|
164
|
+
$stderr.puts "I have logs of the following runs: "
|
165
|
+
$stderr.puts @ttt_obj.runlog_to_s()
|
166
|
+
raise "Problem"
|
167
|
+
end
|
168
|
+
|
169
|
+
# run column where? split table, or the table we are mainly working with?
|
170
|
+
if @second_table
|
171
|
+
run_column_name = @second_table.table_name + "." + run_column_name
|
172
|
+
else
|
173
|
+
run_column_name = @db_table.table_name + "." + run_column_name
|
174
|
+
end
|
175
|
+
|
176
|
+
@standard_value_restrictions << ValueRestriction.new(run_column_name, "FE")
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# pruning?
|
181
|
+
if var_hash["prune"] and # pruning requested in RosyIterator initialization
|
182
|
+
["argrec", "onestep"].include? @step and # pruning only affects argument recognition
|
183
|
+
Pruning.prune?(@exp) # pruning has been set in the experiment file
|
184
|
+
@standard_value_restrictions << Pruning.restriction_removing_pruned(@exp)
|
185
|
+
end
|
186
|
+
|
187
|
+
##
|
188
|
+
# access "xwise" information
|
189
|
+
# are we training by frame or by target POS or target lemma?
|
190
|
+
|
191
|
+
# xwise-value in var_hash overrides others
|
192
|
+
@xwise = var_hash["xwise"]
|
193
|
+
unless @xwise
|
194
|
+
if @step
|
195
|
+
# read xwise from experiment file,
|
196
|
+
# if we know what training/test step we're in
|
197
|
+
@xwise = @exp.get("xwise_" + @step)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
if @xwise.nil?
|
201
|
+
# default: read one frame at a time
|
202
|
+
@xwise = "frame"
|
203
|
+
end
|
204
|
+
|
205
|
+
# xwise is a string consisting of any subset of
|
206
|
+
# "frame", "target_pos", "target" joined by spaces.
|
207
|
+
# transform to an array by splitting at spaces
|
208
|
+
@xwise = @xwise.split()
|
209
|
+
@xwise.each { |xwise_entry|
|
210
|
+
unless @ttt_obj.feature_names.include? xwise_entry
|
211
|
+
# sanity check: valid xwise value?
|
212
|
+
raise "Unknown value for parameter 'xwise' in experiment file.\n" +
|
213
|
+
"Allowed: any subset of the list of features listed in the experiment file.\n" +
|
214
|
+
"This is the granularity of training and testing\n" +
|
215
|
+
"What I got was: " + @xwise.join(" ")
|
216
|
+
end
|
217
|
+
}
|
218
|
+
|
219
|
+
# list all frames/ all target POSs/all frame+target-pairs
|
220
|
+
@groups = unique_values_of_columns(@xwise)
|
221
|
+
@current_group = nil
|
222
|
+
end
|
223
|
+
|
224
|
+
####
|
225
|
+
# get_xwise_column_names
|
226
|
+
#
|
227
|
+
# get the column names used for determining the groups
|
228
|
+
#
|
229
|
+
# returns: an array of strings, ["frame"] or ["frame", "target"],
|
230
|
+
# or ["target_pos"]
|
231
|
+
def get_xwise_column_names()
|
232
|
+
return @xwise
|
233
|
+
end
|
234
|
+
|
235
|
+
####
|
236
|
+
# num_groups
|
237
|
+
# returns: integer
|
238
|
+
def num_groups()
|
239
|
+
return @groups.length()
|
240
|
+
end
|
241
|
+
|
242
|
+
####
|
243
|
+
# each_group
|
244
|
+
#
|
245
|
+
# iterates through the "xwise" groups, sets
|
246
|
+
# internal values such that get_a_view_for_current_group()
|
247
|
+
# will get you the correct view
|
248
|
+
#
|
249
|
+
# yields: for each group, a pair of
|
250
|
+
# - the hash describing the group, as returned by unique_values_of_column
|
251
|
+
# - plus an ID for the group, made up of its hash values concatenated into a string
|
252
|
+
# (values are connected by spaces)
|
253
|
+
def each_group()
|
254
|
+
@groups.each { |hash|
|
255
|
+
# hash is a hash column_name(string)-> value(object)
|
256
|
+
# this is the unique description of the current group
|
257
|
+
@current_group = hash
|
258
|
+
yield [hash, hash.values.join(" ")]
|
259
|
+
}
|
260
|
+
end
|
261
|
+
|
262
|
+
####
|
263
|
+
# get_a_view_for_current_group
|
264
|
+
#
|
265
|
+
# constructs a new View object
|
266
|
+
# matching the last yielded group (of each_group)
|
267
|
+
#
|
268
|
+
# you give it: the names of the columns to be included in the view
|
269
|
+
# (or "*" for all columns) and a list of value restrictions
|
270
|
+
# on the rows (ValueRestriction objects, equalities or inequalities
|
271
|
+
# column_name = value, columnb_name != value), which may be omitted
|
272
|
+
#
|
273
|
+
# returns: DBView object
|
274
|
+
def get_a_view_for_current_group(columns, # array:string, column names to include
|
275
|
+
# or string: "*" for all columns
|
276
|
+
value_restrictions = []) # array:ValueRestriction objects
|
277
|
+
return get_a_view_for_group(@current_group, columns,
|
278
|
+
value_restrictions)
|
279
|
+
end
|
280
|
+
|
281
|
+
####
|
282
|
+
# get_a_view_for_group
|
283
|
+
#
|
284
|
+
# constructs a new View object
|
285
|
+
# matching the a group given by its row hash
|
286
|
+
# (as yielded by each_group)
|
287
|
+
#
|
288
|
+
# you give it: the group description hash,
|
289
|
+
# the names of the columns to be included in the view
|
290
|
+
# (or "*" for all columns) and a list of value restrictions
|
291
|
+
# on the rows (ValueRestriction objects, equalities or inequalities
|
292
|
+
# column_name = value, columnb_name != value), which may be omitted
|
293
|
+
#
|
294
|
+
# returns: DBView object
|
295
|
+
def get_a_view_for_group(group, # hash: column(string)->value(object)
|
296
|
+
# describing the group
|
297
|
+
columns, # array:string, column names to include
|
298
|
+
# or string: "*" for all columns
|
299
|
+
value_restrictions = []) # array:ValueRestriction objects
|
300
|
+
|
301
|
+
# value_restrictions needs to be an array
|
302
|
+
if value_restrictions.nil?
|
303
|
+
value_restrictions = []
|
304
|
+
end
|
305
|
+
|
306
|
+
# we need to add value restrictions that say
|
307
|
+
# that the group column names need to have the values for
|
308
|
+
# the given group.
|
309
|
+
# however, group column names may belong to either the first or
|
310
|
+
# the second table
|
311
|
+
|
312
|
+
# separate group column names into two groups
|
313
|
+
first_columns, second_columns =
|
314
|
+
separate_into_1st_and_2nd_table_cols(group.keys)
|
315
|
+
|
316
|
+
# make separate value restrictions for the two groups
|
317
|
+
value_restrictions = value_restrictions + first_columns.map {|column_name|
|
318
|
+
ValueRestriction.new(column_name, group[column_name])
|
319
|
+
}
|
320
|
+
if second_columns
|
321
|
+
unless @second_table
|
322
|
+
raise "Cannot use second table columns without second table"
|
323
|
+
end
|
324
|
+
value_restrictions.concat second_columns.map { |column_name|
|
325
|
+
ValueRestriction.new(@second_table.table_name + "." + column_name,
|
326
|
+
group[column_name],
|
327
|
+
"table_name_included" => true)
|
328
|
+
}
|
329
|
+
end
|
330
|
+
|
331
|
+
# get a view with the given columns, given value restrictions
|
332
|
+
# plus add more value restrictions: must be the current group
|
333
|
+
return get_a_view(columns,value_restrictions)
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
|
338
|
+
####
|
339
|
+
# get_a_view
|
340
|
+
#
|
341
|
+
# construct a new View object,
|
342
|
+
#
|
343
|
+
# you give it: the names of the columns to be included in the view
|
344
|
+
# (or "*" for all columns) and a list of value restrictions
|
345
|
+
# on the rows (ValueRestriction objects, equalities or inequalities
|
346
|
+
# column_name = value, columnb_name != value), which may be omitted
|
347
|
+
#
|
348
|
+
# returns: DBView object
|
349
|
+
def get_a_view(columns, # array:strings, list of column names
|
350
|
+
# or string "*" (all columns)
|
351
|
+
value_restrictions = []) # array: ValueRestriction objects
|
352
|
+
# or [], nil for no restrictions
|
353
|
+
|
354
|
+
if value_restrictions.nil?
|
355
|
+
value_restrictions = []
|
356
|
+
end
|
357
|
+
return get_a_view_aux(columns, value_restrictions,
|
358
|
+
"gold" => "gold",
|
359
|
+
"dynamic_feature_list" => @dyn_gold_objects,
|
360
|
+
"standard_dyngold_id" => @standard_dyngold_id,
|
361
|
+
"sentence_id_feature" => "sentid")
|
362
|
+
end
|
363
|
+
|
364
|
+
####
|
365
|
+
# unique_values_of_columns
|
366
|
+
#
|
367
|
+
# construct a new View object
|
368
|
+
# for the given column and
|
369
|
+
# get all unique values for it
|
370
|
+
#
|
371
|
+
# returns: a list of hashes, one for each unique set of values
|
372
|
+
def unique_values_of_columns(columns) # array:string, several column names
|
373
|
+
retv = Array.new
|
374
|
+
|
375
|
+
view = get_a_view_aux(columns, [],
|
376
|
+
"distinct" => true)
|
377
|
+
|
378
|
+
view.each_hash() { |row|
|
379
|
+
retv << row
|
380
|
+
}
|
381
|
+
view.close()
|
382
|
+
return retv
|
383
|
+
end
|
384
|
+
|
385
|
+
#############################################
|
386
|
+
private
|
387
|
+
|
388
|
+
###
|
389
|
+
# given a list of column names,
|
390
|
+
# separate them into first table and second table columns
|
391
|
+
#
|
392
|
+
# columns may be either an array of string (column names)
|
393
|
+
# or the string "*" for "all columns"
|
394
|
+
def separate_into_1st_and_2nd_table_cols(columns)
|
395
|
+
|
396
|
+
if @use_cols_from_second_table or @second_table_colprefix
|
397
|
+
# if there are columns I'm supposed to take from the second
|
398
|
+
# table rather than the first, let's do that
|
399
|
+
if columns == "*"
|
400
|
+
# we have simply been told to use all columns
|
401
|
+
columns = @allcolnames
|
402
|
+
end
|
403
|
+
|
404
|
+
# second table columns either start with @second_table_colprefix
|
405
|
+
# or are in the list @use_columns_from_second_table
|
406
|
+
second_columns, first_columns = columns.distribute { |colname|
|
407
|
+
(@second_table_colprefix and colname =~ /^#{@second_table_colprefix}/) or
|
408
|
+
(@use_cols_from_second_table and @use_cols_from_second_table.include?(colname))
|
409
|
+
}
|
410
|
+
|
411
|
+
else
|
412
|
+
# no columns to take from a 2nd table
|
413
|
+
first_columns = columns
|
414
|
+
second_columns = nil
|
415
|
+
end
|
416
|
+
|
417
|
+
return [first_columns, second_columns]
|
418
|
+
end
|
419
|
+
|
420
|
+
###
|
421
|
+
# access DB table:
|
422
|
+
# figure out which table, set of columns from that table,
|
423
|
+
# set of columns from secondary table
|
424
|
+
#
|
425
|
+
# columns: either array of strings or "*"
|
426
|
+
#
|
427
|
+
def get_a_view_aux(columns,
|
428
|
+
value_restrictions,
|
429
|
+
var_hash)
|
430
|
+
|
431
|
+
# distinguish main table and split table columns
|
432
|
+
first_columns, second_columns = separate_into_1st_and_2nd_table_cols(columns)
|
433
|
+
|
434
|
+
# make pairs of a DB table and the columns from that table
|
435
|
+
tables_and_cols = [SelectTableAndColumns.new(@db_table, first_columns)]
|
436
|
+
if @second_table
|
437
|
+
tables_and_cols << SelectTableAndColumns.new(@second_table, second_columns)
|
438
|
+
end
|
439
|
+
|
440
|
+
|
441
|
+
# and get a view
|
442
|
+
return DBView.new(tables_and_cols,
|
443
|
+
value_restrictions + @standard_value_restrictions,
|
444
|
+
@ttt_obj.database,
|
445
|
+
var_hash)
|
446
|
+
end
|
447
|
+
|
448
|
+
end
|
449
|
+
|
450
|
+
|
451
|
+
###############
|
452
|
+
# class DynGoldBinary
|
453
|
+
#
|
454
|
+
# dynamic gold class:
|
455
|
+
# maps all FEs to "FE", and
|
456
|
+
# maps @noval to @noval.
|
457
|
+
#
|
458
|
+
# ID to hand to View in each_hash/each_array/each_sentence if you want
|
459
|
+
# to use this dynamic gold class:
|
460
|
+
# "binary_gold"
|
461
|
+
class DynGoldBinary
|
462
|
+
def initialize(noval)
|
463
|
+
@noval = noval
|
464
|
+
end
|
465
|
+
|
466
|
+
def make(gold)
|
467
|
+
if gold == @noval
|
468
|
+
return @noval
|
469
|
+
else
|
470
|
+
return "FE"
|
471
|
+
end
|
472
|
+
end
|
473
|
+
|
474
|
+
def id()
|
475
|
+
return "binary_gold"
|
476
|
+
end
|
477
|
+
end
|