shalmaneser 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/shalmaneser +8 -2
- data/doc/index.md +1 -0
- data/lib/shalmaneser/opt_parser.rb +68 -67
- metadata +49 -119
- data/bin/fred +0 -16
- data/bin/frprep +0 -34
- data/bin/rosy +0 -17
- data/lib/common/AbstractSynInterface.rb +0 -1229
- data/lib/common/Counter.rb +0 -18
- data/lib/common/EnduserMode.rb +0 -27
- data/lib/common/Eval.rb +0 -480
- data/lib/common/FixSynSemMapping.rb +0 -196
- data/lib/common/Graph.rb +0 -345
- data/lib/common/ISO-8859-1.rb +0 -24
- data/lib/common/ML.rb +0 -186
- data/lib/common/Mallet.rb +0 -236
- data/lib/common/Maxent.rb +0 -229
- data/lib/common/Optimise.rb +0 -195
- data/lib/common/Parser.rb +0 -213
- data/lib/common/RegXML.rb +0 -269
- data/lib/common/RosyConventions.rb +0 -171
- data/lib/common/STXmlTerminalOrder.rb +0 -194
- data/lib/common/SalsaTigerRegXML.rb +0 -2347
- data/lib/common/SalsaTigerXMLHelper.rb +0 -99
- data/lib/common/SynInterfaces.rb +0 -282
- data/lib/common/TabFormat.rb +0 -721
- data/lib/common/Tiger.rb +0 -1448
- data/lib/common/Timbl.rb +0 -144
- data/lib/common/Tree.rb +0 -61
- data/lib/common/config_data.rb +0 -470
- data/lib/common/config_format_element.rb +0 -220
- data/lib/common/headz.rb +0 -338
- data/lib/common/option_parser.rb +0 -13
- data/lib/common/prep_config_data.rb +0 -62
- data/lib/common/prep_helper.rb +0 -1330
- data/lib/common/ruby_class_extensions.rb +0 -310
- data/lib/db/db_interface.rb +0 -48
- data/lib/db/db_mysql.rb +0 -145
- data/lib/db/db_sqlite.rb +0 -280
- data/lib/db/db_table.rb +0 -239
- data/lib/db/db_wrapper.rb +0 -176
- data/lib/db/sql_query.rb +0 -243
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredConventions.rb +0 -232
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred.rb +0 -47
- data/lib/fred/fred_config_data.rb +0 -185
- data/lib/fred/md5.rb +0 -23
- data/lib/fred/opt_parser.rb +0 -250
- data/lib/frprep/Ampersand.rb +0 -39
- data/lib/frprep/CollinsInterface.rb +0 -1165
- data/lib/frprep/Counter.rb +0 -18
- data/lib/frprep/FNCorpusXML.rb +0 -643
- data/lib/frprep/FNDatabase.rb +0 -144
- data/lib/frprep/FrameXML.rb +0 -513
- data/lib/frprep/Graph.rb +0 -345
- data/lib/frprep/MiniparInterface.rb +0 -1388
- data/lib/frprep/RegXML.rb +0 -269
- data/lib/frprep/STXmlTerminalOrder.rb +0 -194
- data/lib/frprep/SleepyInterface.rb +0 -384
- data/lib/frprep/TntInterface.rb +0 -44
- data/lib/frprep/TreetaggerInterface.rb +0 -327
- data/lib/frprep/do_parses.rb +0 -143
- data/lib/frprep/frprep.rb +0 -693
- data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
- data/lib/frprep/interfaces/stanford_interface.rb +0 -353
- data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
- data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
- data/lib/frprep/one_parsed_file.rb +0 -28
- data/lib/frprep/opt_parser.rb +0 -94
- data/lib/frprep/ruby_class_extensions.rb +0 -310
- data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
- data/lib/rosy/ExternalConfigData.rb +0 -58
- data/lib/rosy/FailedParses.rb +0 -130
- data/lib/rosy/FeatureInfo.rb +0 -242
- data/lib/rosy/GfInduce.rb +0 -1115
- data/lib/rosy/GfInduceFeature.rb +0 -148
- data/lib/rosy/InputData.rb +0 -294
- data/lib/rosy/RosyConfusability.rb +0 -338
- data/lib/rosy/RosyEval.rb +0 -465
- data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
- data/lib/rosy/RosyFeaturize.rb +0 -281
- data/lib/rosy/RosyInspect.rb +0 -336
- data/lib/rosy/RosyIterator.rb +0 -478
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
- data/lib/rosy/RosyPruning.rb +0 -165
- data/lib/rosy/RosyServices.rb +0 -744
- data/lib/rosy/RosySplit.rb +0 -232
- data/lib/rosy/RosyTask.rb +0 -19
- data/lib/rosy/RosyTest.rb +0 -829
- data/lib/rosy/RosyTrain.rb +0 -234
- data/lib/rosy/RosyTrainingTestTable.rb +0 -787
- data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
- data/lib/rosy/View.rb +0 -418
- data/lib/rosy/opt_parser.rb +0 -379
- data/lib/rosy/rosy.rb +0 -78
- data/lib/rosy/rosy_config_data.rb +0 -121
- data/lib/shalmaneser/version.rb +0 -3
@@ -1,242 +0,0 @@
|
|
1
|
-
# Katrin Erk November 05
|
2
|
-
#
|
3
|
-
# Abstract classes for
|
4
|
-
# - Rosy features
|
5
|
-
# - Rosy interface for external knowledge sources.
|
6
|
-
|
7
|
-
require 'rosy/ExternalConfigData'
|
8
|
-
|
9
|
-
####
|
10
|
-
# Feature Extractor:
|
11
|
-
# computes one or more features for a node (a SynNode object) out of
|
12
|
-
# a SalsaTigerSentence
|
13
|
-
class AbstractFeatureExtractor
|
14
|
-
@@sent = nil # SalsaTigerSentence: sentence of the current instance
|
15
|
-
@@frame = nil # FrameNode: frame of the current instance
|
16
|
-
@@node = nil # SynNode: constituent that is the current instance
|
17
|
-
@@interpreter_class = nil # SynInterpreter class
|
18
|
-
@@instance_ok = true
|
19
|
-
|
20
|
-
###
|
21
|
-
# returns a string: the designator for this feature extractor
|
22
|
-
# (an extractor may compute several features, but
|
23
|
-
# in the experiment file it is chosen by a single designator)
|
24
|
-
def AbstractFeatureExtractor.designator()
|
25
|
-
raise "Overwrite me"
|
26
|
-
end
|
27
|
-
|
28
|
-
###
|
29
|
-
# returns an array of feature names, the names of the
|
30
|
-
# features that it can compute.
|
31
|
-
# The number of features that the extractor computes must be fixed.
|
32
|
-
def AbstractFeatureExtractor.feature_names()
|
33
|
-
raise "Overwrite me."
|
34
|
-
end
|
35
|
-
|
36
|
-
###
|
37
|
-
# returns a string: the data type for the feature
|
38
|
-
# to be passed on to the MySQL database,
|
39
|
-
# e.g. VARCHAR(10), INT
|
40
|
-
def AbstractFeatureExtractor.sql_type()
|
41
|
-
raise "Overwrite me"
|
42
|
-
end
|
43
|
-
|
44
|
-
###
|
45
|
-
# returns a string: the feature type
|
46
|
-
# (the same for all features computed by this extractor)
|
47
|
-
# possible values:
|
48
|
-
# - gold: gold label
|
49
|
-
# - admin: administrative feature, do not pass this on to the learner
|
50
|
-
# - syn: feature computed from syntactic characteristics of the instance
|
51
|
-
# - sem: feature involving semantic characteristics of the instance
|
52
|
-
# - sentlevel: this feature is the same for all instances of a sentence
|
53
|
-
def AbstractFeatureExtractor.feature_type()
|
54
|
-
raise "Overwrite me"
|
55
|
-
end
|
56
|
-
|
57
|
-
###
|
58
|
-
# returns a string: "phase 1" or "phase 2",
|
59
|
-
# depending on whether the feature is computed
|
60
|
-
# directly from the SalsaTigerSentence and the SynNode objects
|
61
|
-
# or whether it is computed from the phase 1 features
|
62
|
-
def AbstractFeatureExtractor.phase()
|
63
|
-
raise "Overwrite me."
|
64
|
-
end
|
65
|
-
|
66
|
-
###
|
67
|
-
# returns an array of strings, providing information about
|
68
|
-
# the feature extractor
|
69
|
-
def AbstractFeatureExtractor.info()
|
70
|
-
return []
|
71
|
-
end
|
72
|
-
|
73
|
-
###
|
74
|
-
# set sentence, set node, set other settings:
|
75
|
-
# this is done prior to
|
76
|
-
# feature computation using compute_feature()
|
77
|
-
# such that computations that stay the same for
|
78
|
-
# several features can be done in advance
|
79
|
-
#
|
80
|
-
# This is just relevant for Phase 1
|
81
|
-
#
|
82
|
-
# returns: false/nil if there was a problem
|
83
|
-
def AbstractFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
|
84
|
-
frame) # FrameNode object
|
85
|
-
@@sent = sent
|
86
|
-
@@frame = frame
|
87
|
-
|
88
|
-
return true
|
89
|
-
end
|
90
|
-
|
91
|
-
def AbstractFeatureExtractor.set_node(node) # SynNode of the sentence set in set_sentence
|
92
|
-
@@node = node
|
93
|
-
|
94
|
-
return true
|
95
|
-
end
|
96
|
-
|
97
|
-
###
|
98
|
-
# set sentence, set node, set general settings: this is done prior to
|
99
|
-
# feature computation using compute_feature_value()
|
100
|
-
# such that computations that stay the same for
|
101
|
-
# several features can be done in advance
|
102
|
-
def AbstractFeatureExtractor.set(var_hash = {})
|
103
|
-
# no settings at this point
|
104
|
-
|
105
|
-
return true
|
106
|
-
end
|
107
|
-
# test during initialisation whether a feature is computable
|
108
|
-
# gives the feature the possibility to specify additional constraints
|
109
|
-
# e.g. for phase2 features : specify which extractors from phase 1 are presupposed
|
110
|
-
def AbstractFeatureExtractor.is_computable(extractor_list) # bool
|
111
|
-
return true
|
112
|
-
end
|
113
|
-
|
114
|
-
###
|
115
|
-
# @param exp [ConfigData] Experiment file information
|
116
|
-
# @param interpreter_class [Class]
|
117
|
-
def initialize(exp, interpreter_class)
|
118
|
-
@exp = exp
|
119
|
-
@@interpreter_class = interpreter_class
|
120
|
-
end
|
121
|
-
|
122
|
-
###
|
123
|
-
# compute: compute features
|
124
|
-
#
|
125
|
-
# returns an array of features (strings), length the same as the
|
126
|
-
# length of feature_names()
|
127
|
-
def compute_features()
|
128
|
-
raise "overwrite me"
|
129
|
-
end
|
130
|
-
|
131
|
-
###
|
132
|
-
# phase 2 extractors:
|
133
|
-
# compute features for a complete view
|
134
|
-
#
|
135
|
-
# returns: an array of columns,
|
136
|
-
# where a column is an array of feature values.
|
137
|
-
# returns one column per entry in feature_names()
|
138
|
-
def compute_features_on_view(view) # DBView object
|
139
|
-
raise "overwrite me"
|
140
|
-
end
|
141
|
-
|
142
|
-
# At this place, we had abstract methods for "training" phase 2 features
|
143
|
-
# Since this involves introducing a "state" that is nontrivial to preserve
|
144
|
-
# for a standalone version of the classifiers, without keeping the training data,
|
145
|
-
# we decided to remove this functionality (30.11.05).
|
146
|
-
# Features which rely on learning patterns from the training data and applying them
|
147
|
-
# to the test data will from now on be implemented as externals.
|
148
|
-
|
149
|
-
######
|
150
|
-
protected
|
151
|
-
|
152
|
-
def AbstractFeatureExtractor.announce_me()
|
153
|
-
# AB: In 1.9 constants are symbols.
|
154
|
-
if Module.constants.include?("RosyFeatureInfo") or Module.constants.include?(:RosyFeatureInfo)
|
155
|
-
# yup, we have a class to which we can announce ourselves
|
156
|
-
RosyFeatureInfo.add_feature(eval(self.name()))
|
157
|
-
else
|
158
|
-
# no interface collector class
|
159
|
-
# $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
|
160
|
-
end
|
161
|
-
end
|
162
|
-
end
|
163
|
-
|
164
|
-
################################################################
|
165
|
-
# Wrapper class for extractors that compute a single feature
|
166
|
-
class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
|
167
|
-
|
168
|
-
###
|
169
|
-
# returns a string: the designator for this feature extractor
|
170
|
-
# (an extractor may compute several features, but
|
171
|
-
# in the experiment file it is chosen by a single designator)
|
172
|
-
#
|
173
|
-
# here: single feature, and the feature name is the designator
|
174
|
-
def AbstractFeatureExtractor.designator()
|
175
|
-
return eval(self.name()).feature_name()
|
176
|
-
end
|
177
|
-
|
178
|
-
###
|
179
|
-
def AbstractSingleFeatureExtractor.feature_names()
|
180
|
-
return [eval(self.name()).feature_name()]
|
181
|
-
end
|
182
|
-
|
183
|
-
###
|
184
|
-
def compute_features()
|
185
|
-
return [compute_feature()]
|
186
|
-
end
|
187
|
-
|
188
|
-
def compute_features_on_view(view) # DBView object
|
189
|
-
return [compute_feature_on_view(view)]
|
190
|
-
end
|
191
|
-
|
192
|
-
|
193
|
-
######
|
194
|
-
# Single-feature methods
|
195
|
-
|
196
|
-
###
|
197
|
-
def AbstractSingleFeatureExtractor.feature_name()
|
198
|
-
raise "Overwrite me."
|
199
|
-
end
|
200
|
-
|
201
|
-
###
|
202
|
-
def compute_feature()
|
203
|
-
raise "Overwrite me"
|
204
|
-
end
|
205
|
-
|
206
|
-
###
|
207
|
-
def compute_feature_on_view(view) # DBView object
|
208
|
-
raise "Overwrite me"
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
######################################################
|
213
|
-
|
214
|
-
class ExternalFeatureExtractor < AbstractFeatureExtractor
|
215
|
-
|
216
|
-
@@warning_uttered = false
|
217
|
-
|
218
|
-
####
|
219
|
-
# initialization:
|
220
|
-
#
|
221
|
-
# read experiment file for external interfaces
|
222
|
-
def initialize(exp, # RosyConfigData object
|
223
|
-
interpreter_class)
|
224
|
-
|
225
|
-
@exp_rosy = exp
|
226
|
-
@@interpreter_class = interpreter_class
|
227
|
-
|
228
|
-
unless @exp_rosy.get("external_descr_file")
|
229
|
-
unless @@warning_uttered
|
230
|
-
$stderr.puts "Warning: Cannot compute external feature"
|
231
|
-
$stderr.puts "since 'external_descr_file' has not been set"
|
232
|
-
$stderr.puts "in the Rosy experiment file."
|
233
|
-
@@warning_uttered = true
|
234
|
-
end
|
235
|
-
|
236
|
-
@exp_external = nil
|
237
|
-
return
|
238
|
-
end
|
239
|
-
|
240
|
-
@exp_external = ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
|
241
|
-
end
|
242
|
-
end
|
@@ -1,58 +0,0 @@
|
|
1
|
-
# ExternalConfigData
|
2
|
-
# Katrin Erk January 2006
|
3
|
-
#
|
4
|
-
# All scripts that compute additional external knowledge sources
|
5
|
-
# for Fred and Rosy:
|
6
|
-
# access to configuration and experiment description file
|
7
|
-
|
8
|
-
require 'common/config_data'
|
9
|
-
|
10
|
-
##############################
|
11
|
-
# Class ExternalConfigData
|
12
|
-
#
|
13
|
-
# inherits from ConfigData,
|
14
|
-
# sets variable names appropriate to tasks of external knowledge sources
|
15
|
-
|
16
|
-
class ExternalConfigData < ConfigData
|
17
|
-
def initialize(filename)
|
18
|
-
|
19
|
-
# initialize config data object
|
20
|
-
super(filename, # config file
|
21
|
-
{ "directory" => "string", # features
|
22
|
-
|
23
|
-
"experiment_id" => "string",
|
24
|
-
|
25
|
-
"gfmap_restrict_to_downpath" => "bool",
|
26
|
-
"gfmap_restrict_pathlen" => "integer",
|
27
|
-
"gfmap_remove_gf" => "list"
|
28
|
-
},
|
29
|
-
[] # variables
|
30
|
-
)
|
31
|
-
|
32
|
-
# set access functions for list features
|
33
|
-
set_list_feature_access("gfmap_remove_gf",
|
34
|
-
method("access_as_stringlist"))
|
35
|
-
end
|
36
|
-
|
37
|
-
###
|
38
|
-
protected
|
39
|
-
|
40
|
-
#####
|
41
|
-
# access_as_stringlist
|
42
|
-
#
|
43
|
-
# assumed format:
|
44
|
-
#
|
45
|
-
# lhs = rhs1 rhs2 ... rhsN
|
46
|
-
#
|
47
|
-
# given in val_list as string tuples [rhs1,...,rhsN]
|
48
|
-
#
|
49
|
-
# join the rhs strings by spaces, return as string
|
50
|
-
# "rhs1 rhs2 ... rhsN"
|
51
|
-
#
|
52
|
-
def access_as_stringlist(val_list) # array:array:string
|
53
|
-
return val_list.map { |rhs| rhs.join(" ") }
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
|
data/lib/rosy/FailedParses.rb
DELETED
@@ -1,130 +0,0 @@
|
|
1
|
-
# Failed Parses
|
2
|
-
#
|
3
|
-
# SP May 05
|
4
|
-
#
|
5
|
-
# Administration of information about failed parses;
|
6
|
-
# - sentence ID
|
7
|
-
# - frame
|
8
|
-
# - missed FE markables
|
9
|
-
#
|
10
|
-
# this class is pretty much a gloriefied hash table with methods to
|
11
|
-
# - read FailedParses from a file and to write them to a file
|
12
|
-
# - access info in a frame-specific way
|
13
|
-
|
14
|
-
class FailedParses
|
15
|
-
|
16
|
-
###
|
17
|
-
# initialize
|
18
|
-
#
|
19
|
-
# nothing much happens here
|
20
|
-
def initialize()
|
21
|
-
@failed_parses = Array.new
|
22
|
-
end
|
23
|
-
|
24
|
-
###
|
25
|
-
# register
|
26
|
-
#
|
27
|
-
# register new failed parse by specifying
|
28
|
-
# - its sentence id (any object)
|
29
|
-
# - its frame (String)
|
30
|
-
# - its FE list (String Array)
|
31
|
-
|
32
|
-
def register(sent_id, # object
|
33
|
-
frame, # string: frame name
|
34
|
-
target, # string?
|
35
|
-
target_pos, # string: target POS
|
36
|
-
fe_list) # array:string
|
37
|
-
if @failed_parses.assoc sent_id
|
38
|
-
# $stderr.puts "Error: trying to register sentence id #{sent_id} twice!"
|
39
|
-
# $stderr.puts "Skipping second occurrence."
|
40
|
-
end
|
41
|
-
@failed_parses << [sent_id,frame,target,target_pos,fe_list]
|
42
|
-
end
|
43
|
-
|
44
|
-
###
|
45
|
-
# make_split
|
46
|
-
#
|
47
|
-
# produce a "split" of the failed parses into a train and a test section
|
48
|
-
# paramer: train_percentage, Integer between 0 and 100
|
49
|
-
#
|
50
|
-
# returns an Array with two FailedParses objects, the first for the
|
51
|
-
# train data, the second for the test data
|
52
|
-
|
53
|
-
def make_split(train_percentage)
|
54
|
-
unless train_percentage.class < Integer and train_percentage >= 0 and train_percentage <= 100
|
55
|
-
raise "Need Integer between 0 and 100 as training percentage."
|
56
|
-
end
|
57
|
-
train_failed = FailedParses.new()
|
58
|
-
test_failed = FailedParses.new()
|
59
|
-
@failed_parses.each {|sent_id,frame,target,target_pos,fe_list|
|
60
|
-
if rand(100) > train_percentage
|
61
|
-
test_failed.register(sent_id,frame,target,target_pos,fe_list)
|
62
|
-
else
|
63
|
-
train_failed.register(sent_id,frame,target,target_pos,fe_list)
|
64
|
-
end
|
65
|
-
}
|
66
|
-
return [train_failed, test_failed]
|
67
|
-
end
|
68
|
-
|
69
|
-
###
|
70
|
-
# Access information
|
71
|
-
#
|
72
|
-
# failed_sent: number of failed sentences
|
73
|
-
# failed_fes: Hash that maps FE names [String] onto numbers of failed FEs [Int]
|
74
|
-
#
|
75
|
-
# optional parameters: frame, target, target_pos : if not specified or nil, marginal
|
76
|
-
# frequencies are counted (sum over all values)
|
77
|
-
|
78
|
-
|
79
|
-
def failed_sent(frame_spec=nil,target_spec=nil,target_pos_spec=nil)
|
80
|
-
counter = 0
|
81
|
-
@failed_parses.each {|sent_id,frame,target,target_pos,fe_list|
|
82
|
-
if ((frame_spec.nil? or frame_spec == frame) and
|
83
|
-
(target_spec.nil? or target_spec == target) and
|
84
|
-
(target_pos_spec.nil? or target_pos_spec == target_pos))
|
85
|
-
counter += 1
|
86
|
-
end
|
87
|
-
}
|
88
|
-
return counter
|
89
|
-
end
|
90
|
-
|
91
|
-
def failed_fes(frame_spec=nil,target_spec=nil,target_pos_spec=nil)
|
92
|
-
fe_hash = Hash.new(0)
|
93
|
-
@failed_parses.each {|sent_id,frame,target,target_pos,fe_list|
|
94
|
-
if ((frame_spec.nil? or frame_spec == frame) and
|
95
|
-
(target_spec.nil? or target_spec == target) and
|
96
|
-
(target_pos_spec.nil? or target_pos_spec == target))
|
97
|
-
fe_list.each {|fe_label|
|
98
|
-
fe_hash[fe_label] += 1
|
99
|
-
}
|
100
|
-
end
|
101
|
-
}
|
102
|
-
return fe_hash
|
103
|
-
end
|
104
|
-
|
105
|
-
|
106
|
-
###
|
107
|
-
# Marshalling:
|
108
|
-
#
|
109
|
-
# save - save info about failed parses to file
|
110
|
-
# load - load info about failed parses from file
|
111
|
-
|
112
|
-
def save(filename)
|
113
|
-
io_obj = File.new(filename,"w")
|
114
|
-
Marshal.dump(@failed_parses,io_obj)
|
115
|
-
io_obj.close
|
116
|
-
end
|
117
|
-
|
118
|
-
def load(filename)
|
119
|
-
begin
|
120
|
-
io_obj = File.new(filename)
|
121
|
-
@failed_parses = Marshal.load(io_obj)
|
122
|
-
io_obj.close
|
123
|
-
rescue
|
124
|
-
$stderr.puts "WARNING: couldn't read failed parses file #{filename}."
|
125
|
-
$stderr.puts "I'll assume that there are no failed parses."
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
|
130
|
-
end
|
data/lib/rosy/FeatureInfo.rb
DELETED
@@ -1,242 +0,0 @@
|
|
1
|
-
require 'common/ruby_class_extensions'
|
2
|
-
|
3
|
-
class RosyFeatureInfo
|
4
|
-
###
|
5
|
-
# class variable:
|
6
|
-
# list of all known extractors
|
7
|
-
# add to it using add_feature()
|
8
|
-
@@extractors = Array.new
|
9
|
-
|
10
|
-
# boolean. set to true after warning messages have been given once
|
11
|
-
@@warned = false
|
12
|
-
|
13
|
-
###
|
14
|
-
# add interface/interpreter
|
15
|
-
def RosyFeatureInfo.add_feature(class_name) # Class object
|
16
|
-
@@extractors << class_name
|
17
|
-
end
|
18
|
-
|
19
|
-
###
|
20
|
-
def initialize(exp)
|
21
|
-
|
22
|
-
##
|
23
|
-
# make list of extractors that are
|
24
|
-
# either required by the user
|
25
|
-
# or needed by the system
|
26
|
-
@current_extractors = Array.new
|
27
|
-
@exp = exp
|
28
|
-
|
29
|
-
# user-chosen extractors:
|
30
|
-
# returns array of pairs [feature group designator(string), options(array:string)]
|
31
|
-
exp.get_lf("feature").each { |extractor_name, options|
|
32
|
-
extractor = @@extractors.detect { |e| e.designator() == extractor_name }
|
33
|
-
unless extractor
|
34
|
-
# no extractor found matching the given designator
|
35
|
-
unless @@warned
|
36
|
-
$stderr.puts "Warning: Could not find a feature extractor for #{extractor_name}: skipping."
|
37
|
-
end
|
38
|
-
next
|
39
|
-
end
|
40
|
-
|
41
|
-
# read and check options
|
42
|
-
step = nil
|
43
|
-
|
44
|
-
options.each { |option|
|
45
|
-
case option
|
46
|
-
when "dontuse", "argrec", "arglab", "onestep"
|
47
|
-
|
48
|
-
if step
|
49
|
-
# step has already been set
|
50
|
-
$stderr.puts "ERROR in feature #{extractor_name}: Please set only one of the options dontuse, argrec, arglab, onestep"
|
51
|
-
exit 1
|
52
|
-
end
|
53
|
-
|
54
|
-
step = option
|
55
|
-
|
56
|
-
else
|
57
|
-
unless @@warned
|
58
|
-
$stderr.puts "Warning: Unknown option for feature #{extractor_name}: #{option}. Skipping"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
}
|
62
|
-
|
63
|
-
@current_extractors << {
|
64
|
-
"extractor" => extractor,
|
65
|
-
"step" => step
|
66
|
-
}
|
67
|
-
}
|
68
|
-
|
69
|
-
# extractors needed by the system
|
70
|
-
@@extractors.select { |e|
|
71
|
-
# select admin features and gold feature
|
72
|
-
["admin", "gold"].include? e.feature_type()
|
73
|
-
}.each { |extractor|
|
74
|
-
|
75
|
-
# if we have already added that extractor, remove it
|
76
|
-
# and add it with our own options
|
77
|
-
@current_extractors.delete_if { |descr| descr["extractor"].designator() == extractor.designator() }
|
78
|
-
|
79
|
-
@current_extractors << {
|
80
|
-
"extractor"=> extractor,
|
81
|
-
"step" => "dontuse"
|
82
|
-
}
|
83
|
-
}
|
84
|
-
|
85
|
-
# make sure that all extractors are computable in the current model
|
86
|
-
# (i.e. check dependencies)
|
87
|
-
|
88
|
-
allstep_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil?
|
89
|
-
}.map { |e| e["extractor"].designator() }
|
90
|
-
argrec_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil? or e_hash["step"] == "argrec"
|
91
|
-
}.map { |e| e["extractor"].designator() }
|
92
|
-
arglab_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil? or e_hash["step"] == "arglab"
|
93
|
-
}.map { |e| e["extractor"].designator() }
|
94
|
-
onestep_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil? or e_hash["step"] == "onestep"
|
95
|
-
}.map { |e| e["extractor"].designator() }
|
96
|
-
|
97
|
-
@current_extractors.delete_if {|extractor_hash|
|
98
|
-
case extractor_hash["step"]
|
99
|
-
when nil
|
100
|
-
computable = extractor_hash["extractor"].is_computable(allstep_extractors)
|
101
|
-
when "argrec"
|
102
|
-
computable = extractor_hash["extractor"].is_computable(argrec_extractors)
|
103
|
-
when "arglab"
|
104
|
-
computable = extractor_hash["extractor"].is_computable(arglab_extractors)
|
105
|
-
when "onestep"
|
106
|
-
computable = extractor_hash["extractor"].is_computable(onestep_extractors)
|
107
|
-
when "dontuse"
|
108
|
-
# either an admin feature or a user feature not to be used this time
|
109
|
-
computable = true
|
110
|
-
end
|
111
|
-
|
112
|
-
if computable
|
113
|
-
false # i.e. don't delete
|
114
|
-
else
|
115
|
-
unless @@warned
|
116
|
-
$stderr.puts "Warning: Feature extractor #{extractor_hash["extractor"].designator()} cannot be computed: skipping."
|
117
|
-
end
|
118
|
-
true
|
119
|
-
end
|
120
|
-
}
|
121
|
-
|
122
|
-
# make list of all features as hashes
|
123
|
-
# "feature_name" -> string,
|
124
|
-
# "sql_type" -> string,
|
125
|
-
# "is_index" -> boolean,
|
126
|
-
# "step" -> string: argrec, arglab, onestep, or nil
|
127
|
-
# "type" -> string
|
128
|
-
# "phase" -> string: phase 1 or phase 2
|
129
|
-
@features = Array.new
|
130
|
-
@current_extractors.each { |descr|
|
131
|
-
extractor = descr["extractor"]
|
132
|
-
extractor.feature_names.each { |feature_name|
|
133
|
-
@features << {
|
134
|
-
"feature_name" => feature_name,
|
135
|
-
"sql_type" => extractor.sql_type(),
|
136
|
-
"is_index" => extractor.info().include?("index"),
|
137
|
-
"step" => descr["step"],
|
138
|
-
"type" => extractor.feature_type(),
|
139
|
-
"phase" => extractor.phase()
|
140
|
-
}
|
141
|
-
}
|
142
|
-
}
|
143
|
-
|
144
|
-
# do not print warnings again if another RosyFeatureInfo object is made
|
145
|
-
@@warned = true
|
146
|
-
end
|
147
|
-
|
148
|
-
###
|
149
|
-
# get_column_formats
|
150
|
-
#
|
151
|
-
# returns a list of pairs [feature_name(string), sql_column_format(string)]:
|
152
|
-
# all features to be computed, with their SQL column formats
|
153
|
-
def get_column_formats(phase = nil) # string: phase 1 or phase 2
|
154
|
-
return @features.select { |feature_descr|
|
155
|
-
phase.nil? or
|
156
|
-
feature_descr["phase"] == phase
|
157
|
-
}.map { |feature_descr|
|
158
|
-
[feature_descr["feature_name"], feature_descr["sql_type"]]
|
159
|
-
}
|
160
|
-
end
|
161
|
-
|
162
|
-
###
|
163
|
-
# get_column_names
|
164
|
-
#
|
165
|
-
# returns a list of feature names (strings)
|
166
|
-
# all features to be computed
|
167
|
-
def get_column_names(phase = nil) # string: phase 1 or phase 2
|
168
|
-
return @features.select { |feature_descr|
|
169
|
-
phase.nil? or
|
170
|
-
feature_descr["phase"] == phase
|
171
|
-
}.map { |feature_descr|
|
172
|
-
feature_descr["feature_name"]
|
173
|
-
}
|
174
|
-
end
|
175
|
-
|
176
|
-
###
|
177
|
-
# get_index_columns
|
178
|
-
#
|
179
|
-
# returns a list of feature (column) names as Strings
|
180
|
-
# consisting of all features that have been requested as index features
|
181
|
-
# in the experiment file or in the list of @@all_features_we_have above
|
182
|
-
def get_index_columns()
|
183
|
-
return @features.select { |feature_descr|
|
184
|
-
feature_descr["is_index"]
|
185
|
-
}.map {|feature_descr|
|
186
|
-
feature_descr["feature_name"]
|
187
|
-
}
|
188
|
-
end
|
189
|
-
|
190
|
-
###
|
191
|
-
# get_model_features
|
192
|
-
#
|
193
|
-
# returns a list of feature (column) names as strings
|
194
|
-
# consisting of all the features to be used for the modeling
|
195
|
-
#
|
196
|
-
# step: argrec, arglab, onestep
|
197
|
-
def get_model_features(step)
|
198
|
-
|
199
|
-
return @features.select { |feature_descr|
|
200
|
-
# features for the current step
|
201
|
-
# feature_descr["step"] is argrec, arglab, onestep, dontuse, or nil
|
202
|
-
# nil matches all steps
|
203
|
-
# 'dontuse' matches no step, so these features will never be returned here
|
204
|
-
feature_descr["step"].nil? or
|
205
|
-
feature_descr["step"] == step
|
206
|
-
}.reject { |feature_descr|
|
207
|
-
# that are not admin features or the gold label
|
208
|
-
["admin", "gold"].include? feature_descr["type"]
|
209
|
-
}.map { |feature_descr|
|
210
|
-
# use just the names of the features
|
211
|
-
feature_descr["feature_name"]
|
212
|
-
}
|
213
|
-
end
|
214
|
-
|
215
|
-
###
|
216
|
-
# get_extractor_objects
|
217
|
-
#
|
218
|
-
# returns two lists of feature extractor objects,
|
219
|
-
# covering all features of the given phase:
|
220
|
-
# the first list contains RosyFeatureExtractor extractors,
|
221
|
-
# the second list contains the others.
|
222
|
-
def get_extractor_objects(phase, # string: "phase 1" or "phase 2"
|
223
|
-
interpreter_class) # SynInterpreter class
|
224
|
-
unless ["phase 1", "phase 2"].include? phase
|
225
|
-
raise "Shouldn't be here: " + phase
|
226
|
-
end
|
227
|
-
|
228
|
-
return @current_extractors.select { |descr|
|
229
|
-
# select extractors of the right phase
|
230
|
-
descr["extractor"].phase() == phase
|
231
|
-
}.map { |descr|
|
232
|
-
|
233
|
-
# make objects from extractor classes
|
234
|
-
descr["extractor"].new(@exp, interpreter_class)
|
235
|
-
}.distribute { |extractor_obj|
|
236
|
-
# distribute extractors in two bins:
|
237
|
-
# first, rosy extractors
|
238
|
-
# second, others
|
239
|
-
extractor_obj.class.info().include? "rosy"
|
240
|
-
}
|
241
|
-
end
|
242
|
-
end
|