shalmaneser-fred 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/fred +8 -3
- data/lib/fred/FredConventions.rb +190 -189
- data/lib/fred/abstract_context_provider.rb +246 -0
- data/lib/fred/abstract_fred_feature_access.rb +43 -0
- data/lib/fred/answer_key_access.rb +130 -0
- data/lib/fred/aux_keep_writers.rb +94 -0
- data/lib/fred/baseline.rb +153 -0
- data/lib/fred/context_provider.rb +55 -0
- data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
- data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
- data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
- data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
- data/lib/fred/feature_extractors.rb +5 -0
- data/lib/fred/file_zipped.rb +43 -0
- data/lib/fred/find_all_targets.rb +94 -0
- data/lib/fred/find_targets_from_frames.rb +92 -0
- data/lib/fred/fred.rb +43 -40
- data/lib/fred/fred_error.rb +15 -0
- data/lib/fred/fred_eval.rb +311 -0
- data/lib/fred/fred_feature_access.rb +420 -0
- data/lib/fred/fred_feature_info.rb +56 -0
- data/lib/fred/fred_featurize.rb +525 -0
- data/lib/fred/fred_parameters.rb +190 -0
- data/lib/fred/fred_split.rb +86 -0
- data/lib/fred/fred_split_pkg.rb +189 -0
- data/lib/fred/fred_test.rb +571 -0
- data/lib/fred/fred_train.rb +125 -0
- data/lib/fred/grammatical_function_access.rb +63 -0
- data/lib/fred/md5.rb +6 -0
- data/lib/fred/meta_feature_access.rb +185 -0
- data/lib/fred/non_contiguous_context_provider.rb +532 -0
- data/lib/fred/opt_parser.rb +182 -161
- data/lib/fred/plot_and_r_eval.rb +486 -0
- data/lib/fred/single_sent_context_provider.rb +76 -0
- data/lib/fred/slide_var.rb +148 -0
- data/lib/fred/targets.rb +136 -0
- data/lib/fred/toggle_var.rb +61 -0
- data/lib/fred/word_lemma_pos_ne.rb +51 -0
- data/lib/fred/write_features_binary.rb +95 -0
- data/lib/fred/write_features_nary.rb +51 -0
- data/lib/fred/write_features_nary_or_binary.rb +51 -0
- data/lib/shalmaneser/fred.rb +1 -0
- metadata +57 -30
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred_config_data.rb +0 -185
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -58
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -99
- data/test/functional/test_rosy.rb +0 -40
@@ -1,322 +0,0 @@
|
|
1
|
-
class FredFeatureInfo
|
2
|
-
###
|
3
|
-
# class variable:
|
4
|
-
# list of all known extractors
|
5
|
-
# add to it using add_feature()
|
6
|
-
@@extractors = Array.new
|
7
|
-
|
8
|
-
# boolean. set to true after warning messages have been given once
|
9
|
-
@@warned = false
|
10
|
-
|
11
|
-
###
|
12
|
-
# add interface/interpreter
|
13
|
-
def FredFeatureInfo.add_feature(class_name) # Class object
|
14
|
-
@@extractors << class_name
|
15
|
-
end
|
16
|
-
|
17
|
-
###
|
18
|
-
def initialize(exp)
|
19
|
-
|
20
|
-
##
|
21
|
-
# make list of extractors that are
|
22
|
-
# required by the user
|
23
|
-
@features = Array.new
|
24
|
-
@exp = exp
|
25
|
-
|
26
|
-
# user-chosen extractors:
|
27
|
-
# returns array of pairs [feature group designator(string), options(array:string)]
|
28
|
-
exp.get_lf("feature").each { |extractor_name, *options|
|
29
|
-
|
30
|
-
extractor = @@extractors.detect { |e| e.feature_name() == extractor_name }
|
31
|
-
unless extractor
|
32
|
-
# no extractor found matching the given designator
|
33
|
-
unless @@warned
|
34
|
-
$stderr.puts "Warning: Could not find a feature extractor for #{extractor_name}: skipping."
|
35
|
-
end
|
36
|
-
next
|
37
|
-
end
|
38
|
-
|
39
|
-
# no need to use the options here,
|
40
|
-
# the feature extractors can get their options themselves.
|
41
|
-
@features << extractor
|
42
|
-
}
|
43
|
-
|
44
|
-
# do not print warnings again if another RosyFeatureInfo object is made
|
45
|
-
@@warned = true
|
46
|
-
end
|
47
|
-
|
48
|
-
###
|
49
|
-
# get_extractor_objects
|
50
|
-
#
|
51
|
-
# returns a list of feature extractor objects
|
52
|
-
def get_extractor_objects()
|
53
|
-
|
54
|
-
return @features.map{ |feature_class|
|
55
|
-
feature_class.new(@exp)
|
56
|
-
}
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
##################################3
|
61
|
-
class FredFeatureExtractor
|
62
|
-
###
|
63
|
-
# feature name:
|
64
|
-
# name by which you choose this feature
|
65
|
-
# in the experiment file
|
66
|
-
def FredFeatureExtractor.feature_name()
|
67
|
-
raise "Overwrite me."
|
68
|
-
end
|
69
|
-
|
70
|
-
###
|
71
|
-
# initialize with Fred experiment file object
|
72
|
-
def initialize(exp)
|
73
|
-
@exp = exp
|
74
|
-
end
|
75
|
-
|
76
|
-
###
|
77
|
-
# compute features from meta-features
|
78
|
-
#
|
79
|
-
# argument: hash
|
80
|
-
# metafeature_label -> metafeatures
|
81
|
-
# string -> array:string
|
82
|
-
#
|
83
|
-
# yields each feature as a string
|
84
|
-
def each_feature(feature_hash)
|
85
|
-
raise "overwrite me"
|
86
|
-
end
|
87
|
-
|
88
|
-
######
|
89
|
-
protected
|
90
|
-
|
91
|
-
def FredFeatureExtractor.announce_me
|
92
|
-
# AB: In 1.9 constants are symbols.
|
93
|
-
if Module.constants.include?("FredFeatureInfo") or Module.constants.include?(:FredFeatureInfo)
|
94
|
-
# yup, we have a class to which we can announce ourselves
|
95
|
-
FredFeatureInfo.add_feature(eval(self.name))
|
96
|
-
else
|
97
|
-
# no interface collector class
|
98
|
-
# $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
|
104
|
-
#####
|
105
|
-
# context feature
|
106
|
-
class FredContextFeatureExtractor < FredFeatureExtractor
|
107
|
-
FredContextFeatureExtractor.announce_me()
|
108
|
-
|
109
|
-
def FredContextFeatureExtractor.feature_name()
|
110
|
-
return "context"
|
111
|
-
end
|
112
|
-
|
113
|
-
###
|
114
|
-
def initialize(exp)
|
115
|
-
super(exp)
|
116
|
-
|
117
|
-
# cxsizes: list of context sizes chosen as features,
|
118
|
-
# encoded in metafeature labels
|
119
|
-
# written in a hash for fast access
|
120
|
-
@cxsizes = Hash.new()
|
121
|
-
@exp.get_lf("feature", "context").each { |cxsize|
|
122
|
-
@cxsizes[ "CX" + cxsize.to_s() ] = true
|
123
|
-
}
|
124
|
-
end
|
125
|
-
|
126
|
-
###
|
127
|
-
def each_feature(feature_hash)
|
128
|
-
# grf#word#lemma#pos#ne
|
129
|
-
lemma_index = 2
|
130
|
-
|
131
|
-
feature_hash.each { |ftype, fvalues|
|
132
|
-
if @cxsizes[ftype]
|
133
|
-
# this is a context feature of a size chosen
|
134
|
-
# by the user for featurization
|
135
|
-
|
136
|
-
fvalues.each { |f|
|
137
|
-
next if f =~ /#####/;
|
138
|
-
yield ftype + f.split("#")[lemma_index]
|
139
|
-
}
|
140
|
-
end
|
141
|
-
}
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
#####
|
146
|
-
# context feature: POS separately, small contexts only
|
147
|
-
class FredContextPOSFeatureExtractor < FredFeatureExtractor
|
148
|
-
FredContextPOSFeatureExtractor.announce_me()
|
149
|
-
|
150
|
-
def FredContextPOSFeatureExtractor.feature_name()
|
151
|
-
return "context_pos"
|
152
|
-
end
|
153
|
-
|
154
|
-
###
|
155
|
-
def initialize(exp)
|
156
|
-
super(exp)
|
157
|
-
|
158
|
-
# cxsizes: list of context sizes chosen as features,
|
159
|
-
# encoded in metafeature labels
|
160
|
-
# written in a hash for fast access
|
161
|
-
@cxsizes = Hash.new()
|
162
|
-
@exp.get_lf("feature", "context").each { |cxsize|
|
163
|
-
if cxsize <= 10
|
164
|
-
@cxsizes[ "CX" + cxsize.to_s() ] = true
|
165
|
-
end
|
166
|
-
}
|
167
|
-
if @cxsizes.empty?
|
168
|
-
$stderr.puts "context_pos feature warning: will not be computed"
|
169
|
-
$stderr.puts "as there is no context of size <= 10"
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
###
|
174
|
-
def each_feature(feature_hash)
|
175
|
-
# word#lemma#pos#ne
|
176
|
-
pos_index = 2
|
177
|
-
|
178
|
-
feature_hash.each { |ftype, fvalues|
|
179
|
-
if @cxsizes[ftype]
|
180
|
-
# this is a context feature of a size chosen
|
181
|
-
# by the user for featurization
|
182
|
-
|
183
|
-
fvalues.each { |f|
|
184
|
-
yield "POS" + ftype + f.split("#")[pos_index]
|
185
|
-
}
|
186
|
-
end
|
187
|
-
}
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
#####
|
192
|
-
# bigram/trigram feature
|
193
|
-
class FredNgramFeatureExtractor < FredFeatureExtractor
|
194
|
-
FredNgramFeatureExtractor.announce_me()
|
195
|
-
|
196
|
-
def FredNgramFeatureExtractor.feature_name()
|
197
|
-
return "ngram"
|
198
|
-
end
|
199
|
-
|
200
|
-
###
|
201
|
-
def initialize(exp)
|
202
|
-
super(exp)
|
203
|
-
|
204
|
-
# cxsize: context size from which the ngram feature will be computed
|
205
|
-
# encoded in metafeature labels
|
206
|
-
# written in a hash for fast access
|
207
|
-
@cxsize = @exp.get_lf("feature", "context").detect { |cxsize|
|
208
|
-
cxsize >= 2
|
209
|
-
}
|
210
|
-
unless @cxsize
|
211
|
-
$stderr.puts "Warning: no context of size >= 2, so"
|
212
|
-
$stderr.puts "no ngram feature computed."
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
###
|
217
|
-
def each_feature(feature_hash)
|
218
|
-
# word#lemma#pos#ne
|
219
|
-
lemma_index = 1
|
220
|
-
pos_index = 2
|
221
|
-
|
222
|
-
feature_hash.each { |ftype, fvalues|
|
223
|
-
if ftype == "CX" + @cxsize.to_s()
|
224
|
-
# compute the ngram features from this context
|
225
|
-
# |fvalues| = 2*cxsize, that is, cxsize describes
|
226
|
-
# the length of a one-sided context window
|
227
|
-
# the bigram of features around the target
|
228
|
-
# concerns fvalues[cxsize-1] and fvalues[cxsize]
|
229
|
-
# the trigram of two words before, one word after includes
|
230
|
-
# fvalues[cxsize-2], fvalues[cxsize-1] and fvalues[cxsize]
|
231
|
-
|
232
|
-
[
|
233
|
-
[[-1, 0], "BLEM", lemma_index], # bigram of lemmas
|
234
|
-
[[-1, 0], "BPOS", pos_index], # bigram of POSs
|
235
|
-
[[-2, -1, 0], "TLEM", lemma_index], # trigram of lemmas
|
236
|
-
[[-2, -1, 0], "TPOS", pos_index] # trigram of POSs
|
237
|
-
].each { |f_indices, label, subindex|
|
238
|
-
fs = f_indices.map { |i| fvalues[@cxsize+i] }.compact()
|
239
|
-
if fs.length() == f_indices.length()
|
240
|
-
# we successfully extracted entries for all the given indices
|
241
|
-
yield label + fs.map { |f| f.split("#")[subindex] }.join()
|
242
|
-
end
|
243
|
-
}
|
244
|
-
end
|
245
|
-
}
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
|
250
|
-
#####
|
251
|
-
# syntax feature
|
252
|
-
class FredSynFeatureExtractor < FredFeatureExtractor
|
253
|
-
FredSynFeatureExtractor.announce_me()
|
254
|
-
|
255
|
-
def FredSynFeatureExtractor.feature_name()
|
256
|
-
return "syntax"
|
257
|
-
end
|
258
|
-
|
259
|
-
###
|
260
|
-
def each_feature(feature_hash)
|
261
|
-
|
262
|
-
feature_hash.each { |ftype, fvalues|
|
263
|
-
|
264
|
-
case ftype
|
265
|
-
when "CH", "PA"
|
266
|
-
grf_index = 0
|
267
|
-
|
268
|
-
fvalues.each { |f|
|
269
|
-
yield ftype + f.split("#")[grf_index]
|
270
|
-
}
|
271
|
-
|
272
|
-
when "SI"
|
273
|
-
# parentlemma#grf#word#lemma#pos#ne
|
274
|
-
grf_index = 1
|
275
|
-
|
276
|
-
fvalues.each { |f|
|
277
|
-
yield ftype + f.split("#")[grf_index]
|
278
|
-
}
|
279
|
-
|
280
|
-
else
|
281
|
-
# not a syntactic metafeature
|
282
|
-
end
|
283
|
-
}
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
#####
|
291
|
-
# syntax-plus-headword feature
|
292
|
-
class FredSynsemFeatureExtractor < FredFeatureExtractor
|
293
|
-
FredSynsemFeatureExtractor.announce_me()
|
294
|
-
|
295
|
-
def FredSynsemFeatureExtractor.feature_name()
|
296
|
-
return "synsem"
|
297
|
-
end
|
298
|
-
|
299
|
-
###
|
300
|
-
def each_feature(feature_hash)
|
301
|
-
|
302
|
-
feature_hash.each { |ftype, fvalues|
|
303
|
-
case ftype
|
304
|
-
when "CH", "PA"
|
305
|
-
# grf#word#lemma#pos#ne
|
306
|
-
fvalues.each { |f|
|
307
|
-
yield ftype + "SEM" + f
|
308
|
-
}
|
309
|
-
|
310
|
-
when "SI"
|
311
|
-
# parentlemma#grf#word#lemma#pos#ne
|
312
|
-
# remove parent lemma
|
313
|
-
fvalues.each { |f|
|
314
|
-
yield ftype + "SEM" + f.split("#")[1..-1].join("#")
|
315
|
-
}
|
316
|
-
|
317
|
-
else
|
318
|
-
# not a syntax feature
|
319
|
-
end
|
320
|
-
}
|
321
|
-
end
|
322
|
-
end
|