frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,310 @@
|
|
1
|
+
# Katrin Erk Oct 05
|
2
|
+
#
|
3
|
+
# useful extensions to standard classes
|
4
|
+
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
class String
|
8
|
+
def startswith(other_string)
|
9
|
+
self[0..other_string.length() - 1] == other_string
|
10
|
+
end
|
11
|
+
|
12
|
+
def endswith(other_string)
|
13
|
+
not(other_string.length() > self.length()) and
|
14
|
+
self[self.length() - other_string.length()..-1] == other_string
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class File
|
19
|
+
########
|
20
|
+
# check whether a given path exists,
|
21
|
+
# and if it doesn't, make sure it is created.
|
22
|
+
#
|
23
|
+
# piece together the strings in 'pieces' to make the path,
|
24
|
+
# appending "/" to all strings if necessary
|
25
|
+
#
|
26
|
+
# returns: the path pieced together
|
27
|
+
def File.new_dir(*pieces) # strings, to be pieced together
|
28
|
+
|
29
|
+
dir_path, dummy = File.make_path(pieces, true)
|
30
|
+
unless File.exists? dir_path
|
31
|
+
FileUtils.mkdir_p dir_path
|
32
|
+
end
|
33
|
+
# check that all went well in creating the directory)
|
34
|
+
File.existing_dir(dir_path)
|
35
|
+
|
36
|
+
return dir_path
|
37
|
+
end
|
38
|
+
|
39
|
+
########
|
40
|
+
# same as new_dir, but last piece is a filename
|
41
|
+
def File.new_filename(*pieces)
|
42
|
+
dir_path, whole_path = File.make_path(pieces, false)
|
43
|
+
unless File.exists? dir_path
|
44
|
+
FileUtils.mkdir_p dir_path
|
45
|
+
end
|
46
|
+
# check that all went well in creating the directory)
|
47
|
+
File.existing_dir(dir_path)
|
48
|
+
|
49
|
+
return whole_path
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
#####
|
54
|
+
# check whether a given path exists,
|
55
|
+
# and report failure of it does not exist.
|
56
|
+
#
|
57
|
+
# piece together the strings in 'pieces' to make the path,
|
58
|
+
# appending "/" to all strings if necessary
|
59
|
+
#
|
60
|
+
# returns: the path pieced together
|
61
|
+
def File.existing_dir(*pieces) # strings
|
62
|
+
|
63
|
+
dir_path, dummy = File.make_path(pieces, true)
|
64
|
+
|
65
|
+
unless File.exists? dir_path and File.directory? dir_path
|
66
|
+
$stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting."
|
67
|
+
exit(1)
|
68
|
+
end
|
69
|
+
unless File.executable? dir_path
|
70
|
+
$stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
|
71
|
+
exit(1)
|
72
|
+
end
|
73
|
+
|
74
|
+
return dir_path
|
75
|
+
end
|
76
|
+
|
77
|
+
####
|
78
|
+
# like existing_dir, but last bit is filename
|
79
|
+
def File.existing_filename(*pieces) # strings
|
80
|
+
|
81
|
+
dir_path, whole_path = File.make_path(pieces, false)
|
82
|
+
|
83
|
+
unless File.exists? dir_path and File.directory? dir_path
|
84
|
+
$stderr.puts "Error: Directory #{dir_path} doesn't exist. Exiting"
|
85
|
+
exit(1)
|
86
|
+
end
|
87
|
+
unless File.executable? dir_path
|
88
|
+
$stderr.puts "Error: Cannot access directory #{dir_path}. Exiting."
|
89
|
+
exit(1)
|
90
|
+
end
|
91
|
+
|
92
|
+
return whole_path
|
93
|
+
end
|
94
|
+
|
95
|
+
####
|
96
|
+
# piece together the strings in 'pieces' to make a path,
|
97
|
+
# appending "/" to all but the last string if necessary
|
98
|
+
#
|
99
|
+
# if 'pieces' is already a string, take that as a one-piece path
|
100
|
+
#
|
101
|
+
# if dir is true, also append "/" to the last piece of the string
|
102
|
+
#
|
103
|
+
# the resulting path is expanded: For example, initial
|
104
|
+
# ~ is expanded to the setting of $HOME
|
105
|
+
#
|
106
|
+
# returns: pair of strings (directory_part, whole_path)
|
107
|
+
#
|
108
|
+
def File.make_path(pieces, # string or array:string
|
109
|
+
is_dir = false) # Boolean: is the path a directory?
|
110
|
+
|
111
|
+
if pieces.kind_of? String
|
112
|
+
pieces = [ pieces ]
|
113
|
+
end
|
114
|
+
|
115
|
+
dir = ""
|
116
|
+
# iterate over all but the filename
|
117
|
+
if is_dir
|
118
|
+
last_dir_index = -1
|
119
|
+
else
|
120
|
+
last_dir_index = -2
|
121
|
+
end
|
122
|
+
pieces[0..last_dir_index].each { |piece|
|
123
|
+
if piece.nil?
|
124
|
+
# whoops, nil entry in name of path!
|
125
|
+
$stderr.puts "File.make_path ERROR: nil for piece of path name."
|
126
|
+
next
|
127
|
+
end
|
128
|
+
if piece =~ /\/$/
|
129
|
+
dir << piece
|
130
|
+
else
|
131
|
+
dir << piece << "/"
|
132
|
+
end
|
133
|
+
}
|
134
|
+
dir = File.expand_path(dir)
|
135
|
+
# expand_path removes the final "/" again
|
136
|
+
unless dir =~ /\/$/
|
137
|
+
dir = dir + "/"
|
138
|
+
end
|
139
|
+
|
140
|
+
if is_dir
|
141
|
+
return [dir, dir]
|
142
|
+
else
|
143
|
+
return [dir, dir + pieces[-1]]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
#############################################
|
150
|
+
class Array
|
151
|
+
|
152
|
+
###
|
153
|
+
# interleave N arrays:
|
154
|
+
# given arrays [a1... an], [b1,...,bn], ..[z1, ...,zn]
|
155
|
+
# return [[a1,b1, .., z1]...,[an,bn, .., zn]]
|
156
|
+
#
|
157
|
+
# if one array is longer than the other,
|
158
|
+
# e.g. [a1...an], [b1,...,bm] with n> m
|
159
|
+
# the result is
|
160
|
+
# [[a1,b1],...[am, bm], [am+1, nil], ..., [an, nil]]
|
161
|
+
# and analogously for m>n
|
162
|
+
def interleave(*arrays)
|
163
|
+
len = [length(), arrays.map { |a| a.length() }.max()].max()
|
164
|
+
(0..len-1).to_a.map { |ix|
|
165
|
+
[at(ix)] + arrays.map { |a| a[ix] }
|
166
|
+
}
|
167
|
+
end
|
168
|
+
|
169
|
+
###
|
170
|
+
# prepend: prepend element to array
|
171
|
+
# because I can never remember which is 'shift'
|
172
|
+
# and which is 'unshift'
|
173
|
+
def prepend(element)
|
174
|
+
unshift(element)
|
175
|
+
end
|
176
|
+
|
177
|
+
###
|
178
|
+
# count the number of occurrences of element in this array
|
179
|
+
def count(element)
|
180
|
+
num = 0
|
181
|
+
each { |my_element|
|
182
|
+
if my_element == element
|
183
|
+
num += 1
|
184
|
+
end
|
185
|
+
}
|
186
|
+
return num
|
187
|
+
end
|
188
|
+
|
189
|
+
###
|
190
|
+
# count the number of occurrences of
|
191
|
+
# elements from list in this array
|
192
|
+
def counts(list)
|
193
|
+
num = 0
|
194
|
+
each { |my_element|
|
195
|
+
if list.include? my_element
|
196
|
+
num += 1
|
197
|
+
end
|
198
|
+
}
|
199
|
+
return num
|
200
|
+
end
|
201
|
+
|
202
|
+
###
|
203
|
+
# draw a random sample of size N
|
204
|
+
# from this array
|
205
|
+
def sample(size)
|
206
|
+
if size < 0
|
207
|
+
return nil
|
208
|
+
elsif size == 0
|
209
|
+
return []
|
210
|
+
elsif size >= length()
|
211
|
+
return self.clone()
|
212
|
+
end
|
213
|
+
|
214
|
+
rank = Hash.new()
|
215
|
+
each { |my_element|
|
216
|
+
rank[my_element] = rand()
|
217
|
+
}
|
218
|
+
return self.sort { |a, b| rank[a] <=> rank[b] }[0..size-1]
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
class Float
|
223
|
+
###
|
224
|
+
# round a float to the given number of decimal points
|
225
|
+
def round_to_decpts(n)
|
226
|
+
if self.nan?
|
227
|
+
return self
|
228
|
+
else
|
229
|
+
return (self * 10**n).round.to_f / 10**n
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
################
|
235
|
+
module EnumerableBool
|
236
|
+
###
|
237
|
+
# And_{x \in X} block(x)
|
238
|
+
def big_and(&block)
|
239
|
+
each { |x|
|
240
|
+
unless block.call(x)
|
241
|
+
return false
|
242
|
+
end
|
243
|
+
}
|
244
|
+
return true
|
245
|
+
end
|
246
|
+
|
247
|
+
###
|
248
|
+
# Or_{x \in X} block(x)
|
249
|
+
def big_or(&block)
|
250
|
+
each { |x|
|
251
|
+
if block.call(x)
|
252
|
+
return true
|
253
|
+
end
|
254
|
+
}
|
255
|
+
return false
|
256
|
+
end
|
257
|
+
|
258
|
+
###
|
259
|
+
# Sum_{x \in X} block(x)
|
260
|
+
def big_sum(init = 0, &block)
|
261
|
+
sum = init
|
262
|
+
unless block_given?
|
263
|
+
block = Proc.new { |x| x}
|
264
|
+
end
|
265
|
+
each { |x|
|
266
|
+
sum += block.call(x)
|
267
|
+
}
|
268
|
+
return sum
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
################
|
273
|
+
# Given an enumerable, distribute its items into two bins (arrays)
|
274
|
+
# depending on whether the block returns true
|
275
|
+
module EnumerableDistribute
|
276
|
+
def distribute(&block)
|
277
|
+
retv1 = Array.new
|
278
|
+
retv2 = Array.new
|
279
|
+
each { |x|
|
280
|
+
if block.call(x)
|
281
|
+
retv1 << x
|
282
|
+
else
|
283
|
+
retv2 << x
|
284
|
+
end
|
285
|
+
}
|
286
|
+
return [retv1, retv2]
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
#####################
|
291
|
+
# map with index
|
292
|
+
module MapWithIndex
|
293
|
+
def map_with_index(&block)
|
294
|
+
retv = Array.new
|
295
|
+
|
296
|
+
each_with_index { |x, index|
|
297
|
+
retv << block.call(x, index)
|
298
|
+
}
|
299
|
+
|
300
|
+
return retv
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
# include new Mixins into array already.
|
305
|
+
# for other classes, do this when requiring StandardPkgExtensions
|
306
|
+
class Array
|
307
|
+
include EnumerableBool
|
308
|
+
include EnumerableDistribute
|
309
|
+
include MapWithIndex
|
310
|
+
end
|
@@ -0,0 +1,240 @@
|
|
1
|
+
# Katrin Erk November 05
|
2
|
+
#
|
3
|
+
# Abstract classes for
|
4
|
+
# - Rosy features
|
5
|
+
# - Rosy interface for external knowledge sources.
|
6
|
+
|
7
|
+
require 'rosy/ExternalConfigData'
|
8
|
+
|
9
|
+
####
|
10
|
+
# Feature Extractor:
|
11
|
+
# computes one or more features for a node (a SynNode object) out of
|
12
|
+
# a SalsaTigerSentence
|
13
|
+
class AbstractFeatureExtractor
|
14
|
+
@@sent = nil # SalsaTigerSentence: sentence of the current instance
|
15
|
+
@@frame = nil # FrameNode: frame of the current instance
|
16
|
+
@@node = nil # SynNode: constituent that is the current instance
|
17
|
+
@@interpreter_class = nil # SynInterpreter class
|
18
|
+
@@instance_ok = true
|
19
|
+
|
20
|
+
###
|
21
|
+
# returns a string: the designator for this feature extractor
|
22
|
+
# (an extractor may compute several features, but
|
23
|
+
# in the experiment file it is chosen by a single designator)
|
24
|
+
def AbstractFeatureExtractor.designator()
|
25
|
+
raise "Overwrite me"
|
26
|
+
end
|
27
|
+
|
28
|
+
###
|
29
|
+
# returns an array of feature names, the names of the
|
30
|
+
# features that it can compute.
|
31
|
+
# The number of features that the extractor computes must be fixed.
|
32
|
+
def AbstractFeatureExtractor.feature_names()
|
33
|
+
raise "Overwrite me."
|
34
|
+
end
|
35
|
+
|
36
|
+
###
|
37
|
+
# returns a string: the data type for the feature
|
38
|
+
# to be passed on to the MySQL database,
|
39
|
+
# e.g. VARCHAR(10), INT
|
40
|
+
def AbstractFeatureExtractor.sql_type()
|
41
|
+
raise "Overwrite me"
|
42
|
+
end
|
43
|
+
|
44
|
+
###
|
45
|
+
# returns a string: the feature type
|
46
|
+
# (the same for all features computed by this extractor)
|
47
|
+
# possible values:
|
48
|
+
# - gold: gold label
|
49
|
+
# - admin: administrative feature, do not pass this on to the learner
|
50
|
+
# - syn: feature computed from syntactic characteristics of the instance
|
51
|
+
# - sem: feature involving semantic characteristics of the instance
|
52
|
+
# - sentlevel: this feature is the same for all instances of a sentence
|
53
|
+
def AbstractFeatureExtractor.feature_type()
|
54
|
+
raise "Overwrite me"
|
55
|
+
end
|
56
|
+
|
57
|
+
###
|
58
|
+
# returns a string: "phase 1" or "phase 2",
|
59
|
+
# depending on whether the feature is computed
|
60
|
+
# directly from the SalsaTigerSentence and the SynNode objects
|
61
|
+
# or whether it is computed from the phase 1 features
|
62
|
+
def AbstractFeatureExtractor.phase()
|
63
|
+
raise "Overwrite me."
|
64
|
+
end
|
65
|
+
|
66
|
+
###
|
67
|
+
# returns an array of strings, providing information about
|
68
|
+
# the feature extractor
|
69
|
+
def AbstractFeatureExtractor.info()
|
70
|
+
return []
|
71
|
+
end
|
72
|
+
|
73
|
+
###
|
74
|
+
# set sentence, set node, set other settings:
|
75
|
+
# this is done prior to
|
76
|
+
# feature computation using compute_feature()
|
77
|
+
# such that computations that stay the same for
|
78
|
+
# several features can be done in advance
|
79
|
+
#
|
80
|
+
# This is just relevant for Phase 1
|
81
|
+
#
|
82
|
+
# returns: false/nil if there was a problem
|
83
|
+
def AbstractFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
|
84
|
+
frame) # FrameNode object
|
85
|
+
@@sent = sent
|
86
|
+
@@frame = frame
|
87
|
+
|
88
|
+
return true
|
89
|
+
end
|
90
|
+
|
91
|
+
def AbstractFeatureExtractor.set_node(node) # SynNode of the sentence set in set_sentence
|
92
|
+
@@node = node
|
93
|
+
|
94
|
+
return true
|
95
|
+
end
|
96
|
+
|
97
|
+
###
|
98
|
+
# set sentence, set node, set general settings: this is done prior to
|
99
|
+
# feature computation using compute_feature_value()
|
100
|
+
# such that computations that stay the same for
|
101
|
+
# several features can be done in advance
|
102
|
+
def AbstractFeatureExtractor.set(var_hash = {})
|
103
|
+
# no settings at this point
|
104
|
+
|
105
|
+
return true
|
106
|
+
end
|
107
|
+
# test during initialisation whether a feature is computable
|
108
|
+
# gives the feature the possibility to specify additional constraints
|
109
|
+
# e.g. for phase2 features : specify which extractors from phase 1 are presupposed
|
110
|
+
def AbstractFeatureExtractor.is_computable(extractor_list) # bool
|
111
|
+
return true
|
112
|
+
end
|
113
|
+
|
114
|
+
###
|
115
|
+
def initialize(exp, # ConfigData object: experiment file information
|
116
|
+
interpreter_class)
|
117
|
+
@exp = exp
|
118
|
+
@@interpreter_class = interpreter_class
|
119
|
+
end
|
120
|
+
|
121
|
+
###
|
122
|
+
# compute: compute features
|
123
|
+
#
|
124
|
+
# returns an array of features (strings), length the same as the
|
125
|
+
# length of feature_names()
|
126
|
+
def compute_features()
|
127
|
+
raise "overwrite me"
|
128
|
+
end
|
129
|
+
|
130
|
+
###
|
131
|
+
# phase 2 extractors:
|
132
|
+
# compute features for a complete view
|
133
|
+
#
|
134
|
+
# returns: an array of columns,
|
135
|
+
# where a column is an array of feature values.
|
136
|
+
# returns one column per entry in feature_names()
|
137
|
+
def compute_features_on_view(view) # DBView object
|
138
|
+
raise "overwrite me"
|
139
|
+
end
|
140
|
+
|
141
|
+
# At this place, we had abstract methods for "training" phase 2 features
|
142
|
+
# Since this involves introducing a "state" that is nontrivial to preserve
|
143
|
+
# for a standalone version of the classifiers, without keeping the training data,
|
144
|
+
# we decided to remove this functionality (30.11.05).
|
145
|
+
# Features which rely on learning patterns from the training data and applying them
|
146
|
+
# to the test data will from now on be implemented as externals.
|
147
|
+
|
148
|
+
######
|
149
|
+
protected
|
150
|
+
|
151
|
+
def AbstractFeatureExtractor.announce_me()
|
152
|
+
if Module.constants.include? "RosyFeatureInfo"
|
153
|
+
# yup, we have a class to which we can announce ourselves
|
154
|
+
RosyFeatureInfo.add_feature(eval(self.name()))
|
155
|
+
else
|
156
|
+
# no interface collector class
|
157
|
+
# $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
################################################################
|
163
|
+
# Wrapper class for extractors that compute a single feature
|
164
|
+
class AbstractSingleFeatureExtractor < AbstractFeatureExtractor
|
165
|
+
|
166
|
+
###
|
167
|
+
# returns a string: the designator for this feature extractor
|
168
|
+
# (an extractor may compute several features, but
|
169
|
+
# in the experiment file it is chosen by a single designator)
|
170
|
+
#
|
171
|
+
# here: single feature, and the feature name is the designator
|
172
|
+
def AbstractFeatureExtractor.designator()
|
173
|
+
return eval(self.name()).feature_name()
|
174
|
+
end
|
175
|
+
|
176
|
+
###
|
177
|
+
def AbstractSingleFeatureExtractor.feature_names()
|
178
|
+
return [eval(self.name()).feature_name()]
|
179
|
+
end
|
180
|
+
|
181
|
+
###
|
182
|
+
def compute_features()
|
183
|
+
return [compute_feature()]
|
184
|
+
end
|
185
|
+
|
186
|
+
def compute_features_on_view(view) # DBView object
|
187
|
+
return [compute_feature_on_view(view)]
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
######
|
192
|
+
# Single-feature methods
|
193
|
+
|
194
|
+
###
|
195
|
+
def AbstractSingleFeatureExtractor.feature_name()
|
196
|
+
raise "Overwrite me."
|
197
|
+
end
|
198
|
+
|
199
|
+
###
|
200
|
+
def compute_feature()
|
201
|
+
raise "Overwrite me"
|
202
|
+
end
|
203
|
+
|
204
|
+
###
|
205
|
+
def compute_feature_on_view(view) # DBView object
|
206
|
+
raise "Overwrite me"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
######################################################
|
211
|
+
|
212
|
+
class ExternalFeatureExtractor < AbstractFeatureExtractor
|
213
|
+
|
214
|
+
@@warning_uttered = false
|
215
|
+
|
216
|
+
####
|
217
|
+
# initialization:
|
218
|
+
#
|
219
|
+
# read experiment file for external interfaces
|
220
|
+
def initialize(exp, # RosyConfigData object
|
221
|
+
interpreter_class)
|
222
|
+
|
223
|
+
@exp_rosy = exp
|
224
|
+
@@interpreter_class = interpreter_class
|
225
|
+
|
226
|
+
unless @exp_rosy.get("external_descr_file")
|
227
|
+
unless @@warning_uttered
|
228
|
+
$stderr.puts "Warning: Cannot compute external feature"
|
229
|
+
$stderr.puts "since 'external_descr_file' has not been set"
|
230
|
+
$stderr.puts "in the Rosy experiment file."
|
231
|
+
@@warning_uttered = true
|
232
|
+
end
|
233
|
+
|
234
|
+
@exp_external = nil
|
235
|
+
return
|
236
|
+
end
|
237
|
+
|
238
|
+
@exp_external = ExternalConfigData.new(@exp_rosy.get("external_descr_file"))
|
239
|
+
end
|
240
|
+
end
|