shalmaneser-fred 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/fred +8 -3
  4. data/lib/fred/FredConventions.rb +190 -189
  5. data/lib/fred/abstract_context_provider.rb +246 -0
  6. data/lib/fred/abstract_fred_feature_access.rb +43 -0
  7. data/lib/fred/answer_key_access.rb +130 -0
  8. data/lib/fred/aux_keep_writers.rb +94 -0
  9. data/lib/fred/baseline.rb +153 -0
  10. data/lib/fred/context_provider.rb +55 -0
  11. data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
  12. data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
  13. data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
  14. data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
  15. data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
  16. data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
  17. data/lib/fred/feature_extractors.rb +5 -0
  18. data/lib/fred/file_zipped.rb +43 -0
  19. data/lib/fred/find_all_targets.rb +94 -0
  20. data/lib/fred/find_targets_from_frames.rb +92 -0
  21. data/lib/fred/fred.rb +43 -40
  22. data/lib/fred/fred_error.rb +15 -0
  23. data/lib/fred/fred_eval.rb +311 -0
  24. data/lib/fred/fred_feature_access.rb +420 -0
  25. data/lib/fred/fred_feature_info.rb +56 -0
  26. data/lib/fred/fred_featurize.rb +525 -0
  27. data/lib/fred/fred_parameters.rb +190 -0
  28. data/lib/fred/fred_split.rb +86 -0
  29. data/lib/fred/fred_split_pkg.rb +189 -0
  30. data/lib/fred/fred_test.rb +571 -0
  31. data/lib/fred/fred_train.rb +125 -0
  32. data/lib/fred/grammatical_function_access.rb +63 -0
  33. data/lib/fred/md5.rb +6 -0
  34. data/lib/fred/meta_feature_access.rb +185 -0
  35. data/lib/fred/non_contiguous_context_provider.rb +532 -0
  36. data/lib/fred/opt_parser.rb +182 -161
  37. data/lib/fred/plot_and_r_eval.rb +486 -0
  38. data/lib/fred/single_sent_context_provider.rb +76 -0
  39. data/lib/fred/slide_var.rb +148 -0
  40. data/lib/fred/targets.rb +136 -0
  41. data/lib/fred/toggle_var.rb +61 -0
  42. data/lib/fred/word_lemma_pos_ne.rb +51 -0
  43. data/lib/fred/write_features_binary.rb +95 -0
  44. data/lib/fred/write_features_nary.rb +51 -0
  45. data/lib/fred/write_features_nary_or_binary.rb +51 -0
  46. data/lib/shalmaneser/fred.rb +1 -0
  47. metadata +57 -30
  48. data/lib/fred/Baseline.rb +0 -150
  49. data/lib/fred/FileZipped.rb +0 -31
  50. data/lib/fred/FredBOWContext.rb +0 -877
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred_config_data.rb +0 -185
  64. data/test/frprep/test_opt_parser.rb +0 -94
  65. data/test/functional/functional_test_helper.rb +0 -58
  66. data/test/functional/test_fred.rb +0 -47
  67. data/test/functional/test_frprep.rb +0 -99
  68. data/test/functional/test_rosy.rb +0 -40
@@ -1,322 +0,0 @@
1
- class FredFeatureInfo
2
- ###
3
- # class variable:
4
- # list of all known extractors
5
- # add to it using add_feature()
6
- @@extractors = Array.new
7
-
8
- # boolean. set to true after warning messages have been given once
9
- @@warned = false
10
-
11
- ###
12
- # add interface/interpreter
13
- def FredFeatureInfo.add_feature(class_name) # Class object
14
- @@extractors << class_name
15
- end
16
-
17
- ###
18
- def initialize(exp)
19
-
20
- ##
21
- # make list of extractors that are
22
- # required by the user
23
- @features = Array.new
24
- @exp = exp
25
-
26
- # user-chosen extractors:
27
- # returns array of pairs [feature group designator(string), options(array:string)]
28
- exp.get_lf("feature").each { |extractor_name, *options|
29
-
30
- extractor = @@extractors.detect { |e| e.feature_name() == extractor_name }
31
- unless extractor
32
- # no extractor found matching the given designator
33
- unless @@warned
34
- $stderr.puts "Warning: Could not find a feature extractor for #{extractor_name}: skipping."
35
- end
36
- next
37
- end
38
-
39
- # no need to use the options here,
40
- # the feature extractors can get their options themselves.
41
- @features << extractor
42
- }
43
-
44
- # do not print warnings again if another RosyFeatureInfo object is made
45
- @@warned = true
46
- end
47
-
48
- ###
49
- # get_extractor_objects
50
- #
51
- # returns a list of feature extractor objects
52
- def get_extractor_objects()
53
-
54
- return @features.map{ |feature_class|
55
- feature_class.new(@exp)
56
- }
57
- end
58
- end
59
-
60
- ##################################3
61
- class FredFeatureExtractor
62
- ###
63
- # feature name:
64
- # name by which you choose this feature
65
- # in the experiment file
66
- def FredFeatureExtractor.feature_name()
67
- raise "Overwrite me."
68
- end
69
-
70
- ###
71
- # initialize with Fred experiment file object
72
- def initialize(exp)
73
- @exp = exp
74
- end
75
-
76
- ###
77
- # compute features from meta-features
78
- #
79
- # argument: hash
80
- # metafeature_label -> metafeatures
81
- # string -> array:string
82
- #
83
- # yields each feature as a string
84
- def each_feature(feature_hash)
85
- raise "overwrite me"
86
- end
87
-
88
- ######
89
- protected
90
-
91
- def FredFeatureExtractor.announce_me
92
- # AB: In 1.9 constants are symbols.
93
- if Module.constants.include?("FredFeatureInfo") or Module.constants.include?(:FredFeatureInfo)
94
- # yup, we have a class to which we can announce ourselves
95
- FredFeatureInfo.add_feature(eval(self.name))
96
- else
97
- # no interface collector class
98
- # $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
99
- end
100
- end
101
-
102
- end
103
-
104
- #####
105
- # context feature
106
- class FredContextFeatureExtractor < FredFeatureExtractor
107
- FredContextFeatureExtractor.announce_me()
108
-
109
- def FredContextFeatureExtractor.feature_name()
110
- return "context"
111
- end
112
-
113
- ###
114
- def initialize(exp)
115
- super(exp)
116
-
117
- # cxsizes: list of context sizes chosen as features,
118
- # encoded in metafeature labels
119
- # written in a hash for fast access
120
- @cxsizes = Hash.new()
121
- @exp.get_lf("feature", "context").each { |cxsize|
122
- @cxsizes[ "CX" + cxsize.to_s() ] = true
123
- }
124
- end
125
-
126
- ###
127
- def each_feature(feature_hash)
128
- # grf#word#lemma#pos#ne
129
- lemma_index = 2
130
-
131
- feature_hash.each { |ftype, fvalues|
132
- if @cxsizes[ftype]
133
- # this is a context feature of a size chosen
134
- # by the user for featurization
135
-
136
- fvalues.each { |f|
137
- next if f =~ /#####/;
138
- yield ftype + f.split("#")[lemma_index]
139
- }
140
- end
141
- }
142
- end
143
- end
144
-
145
- #####
146
- # context feature: POS separately, small contexts only
147
- class FredContextPOSFeatureExtractor < FredFeatureExtractor
148
- FredContextPOSFeatureExtractor.announce_me()
149
-
150
- def FredContextPOSFeatureExtractor.feature_name()
151
- return "context_pos"
152
- end
153
-
154
- ###
155
- def initialize(exp)
156
- super(exp)
157
-
158
- # cxsizes: list of context sizes chosen as features,
159
- # encoded in metafeature labels
160
- # written in a hash for fast access
161
- @cxsizes = Hash.new()
162
- @exp.get_lf("feature", "context").each { |cxsize|
163
- if cxsize <= 10
164
- @cxsizes[ "CX" + cxsize.to_s() ] = true
165
- end
166
- }
167
- if @cxsizes.empty?
168
- $stderr.puts "context_pos feature warning: will not be computed"
169
- $stderr.puts "as there is no context of size <= 10"
170
- end
171
- end
172
-
173
- ###
174
- def each_feature(feature_hash)
175
- # word#lemma#pos#ne
176
- pos_index = 2
177
-
178
- feature_hash.each { |ftype, fvalues|
179
- if @cxsizes[ftype]
180
- # this is a context feature of a size chosen
181
- # by the user for featurization
182
-
183
- fvalues.each { |f|
184
- yield "POS" + ftype + f.split("#")[pos_index]
185
- }
186
- end
187
- }
188
- end
189
- end
190
-
191
- #####
192
- # bigram/trigram feature
193
- class FredNgramFeatureExtractor < FredFeatureExtractor
194
- FredNgramFeatureExtractor.announce_me()
195
-
196
- def FredNgramFeatureExtractor.feature_name()
197
- return "ngram"
198
- end
199
-
200
- ###
201
- def initialize(exp)
202
- super(exp)
203
-
204
- # cxsize: context size from which the ngram feature will be computed
205
- # encoded in metafeature labels
206
- # written in a hash for fast access
207
- @cxsize = @exp.get_lf("feature", "context").detect { |cxsize|
208
- cxsize >= 2
209
- }
210
- unless @cxsize
211
- $stderr.puts "Warning: no context of size >= 2, so"
212
- $stderr.puts "no ngram feature computed."
213
- end
214
- end
215
-
216
- ###
217
- def each_feature(feature_hash)
218
- # word#lemma#pos#ne
219
- lemma_index = 1
220
- pos_index = 2
221
-
222
- feature_hash.each { |ftype, fvalues|
223
- if ftype == "CX" + @cxsize.to_s()
224
- # compute the ngram features from this context
225
- # |fvalues| = 2*cxsize, that is, cxsize describes
226
- # the length of a one-sided context window
227
- # the bigram of features around the target
228
- # concerns fvalues[cxsize-1] and fvalues[cxsize]
229
- # the trigram of two words before, one word after includes
230
- # fvalues[cxsize-2], fvalues[cxsize-1] and fvalues[cxsize]
231
-
232
- [
233
- [[-1, 0], "BLEM", lemma_index], # bigram of lemmas
234
- [[-1, 0], "BPOS", pos_index], # bigram of POSs
235
- [[-2, -1, 0], "TLEM", lemma_index], # trigram of lemmas
236
- [[-2, -1, 0], "TPOS", pos_index] # trigram of POSs
237
- ].each { |f_indices, label, subindex|
238
- fs = f_indices.map { |i| fvalues[@cxsize+i] }.compact()
239
- if fs.length() == f_indices.length()
240
- # we successfully extracted entries for all the given indices
241
- yield label + fs.map { |f| f.split("#")[subindex] }.join()
242
- end
243
- }
244
- end
245
- }
246
- end
247
- end
248
-
249
-
250
- #####
251
- # syntax feature
252
- class FredSynFeatureExtractor < FredFeatureExtractor
253
- FredSynFeatureExtractor.announce_me()
254
-
255
- def FredSynFeatureExtractor.feature_name()
256
- return "syntax"
257
- end
258
-
259
- ###
260
- def each_feature(feature_hash)
261
-
262
- feature_hash.each { |ftype, fvalues|
263
-
264
- case ftype
265
- when "CH", "PA"
266
- grf_index = 0
267
-
268
- fvalues.each { |f|
269
- yield ftype + f.split("#")[grf_index]
270
- }
271
-
272
- when "SI"
273
- # parentlemma#grf#word#lemma#pos#ne
274
- grf_index = 1
275
-
276
- fvalues.each { |f|
277
- yield ftype + f.split("#")[grf_index]
278
- }
279
-
280
- else
281
- # not a syntactic metafeature
282
- end
283
- }
284
- end
285
- end
286
-
287
-
288
-
289
-
290
- #####
291
- # syntax-plus-headword feature
292
- class FredSynsemFeatureExtractor < FredFeatureExtractor
293
- FredSynsemFeatureExtractor.announce_me()
294
-
295
- def FredSynsemFeatureExtractor.feature_name()
296
- return "synsem"
297
- end
298
-
299
- ###
300
- def each_feature(feature_hash)
301
-
302
- feature_hash.each { |ftype, fvalues|
303
- case ftype
304
- when "CH", "PA"
305
- # grf#word#lemma#pos#ne
306
- fvalues.each { |f|
307
- yield ftype + "SEM" + f
308
- }
309
-
310
- when "SI"
311
- # parentlemma#grf#word#lemma#pos#ne
312
- # remove parent lemma
313
- fvalues.each { |f|
314
- yield ftype + "SEM" + f.split("#")[1..-1].join("#")
315
- }
316
-
317
- else
318
- # not a syntax feature
319
- end
320
- }
321
- end
322
- end