shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,312 +0,0 @@
1
- # FredEval
2
- # Katrin Erk April 05
3
- #
4
- # Frame disambiguation system: evaluate classification results
5
- #
6
- # While the other main classes of Fred just provide a new() method
7
- # and a compute() method,
8
- # the FredEval class also provides access methods to all the
9
- # individual evaluation results and allows for a flag that
10
- # suppresses evaluation output to a file --
11
- # such that this package can also be used by external systems that
12
- # wish to evaluate Fred.
13
- #
14
- # Inherits from the Eval class that is not Fred-specific
15
-
16
- # Salsa packages
17
- require "common/Eval"
18
- require "common/ruby_class_extensions"
19
-
20
- # Fred packages
21
- require "fred/fred_config_data"
22
- require "fred/FredConventions"
23
- require "fred/FredFeatures"
24
- require "fred/FredDetermineTargets"
25
-
26
- class FredEval < Eval
27
-
28
- ###
29
- # new
30
- #
31
- # evaluate runtime options and announce the task
32
- def initialize(exp_obj, # FredConfigData object
33
- options) # hash: runtime option name (string) => value(string)
34
-
35
- in_enduser_mode_unavailable()
36
-
37
- @exp = exp_obj
38
-
39
- ###
40
- # evaluate runtime options
41
- @split_id = nil
42
- logfilename = nil
43
-
44
- options.each_pair { |opt, arg|
45
- case opt
46
- when "--logID"
47
-
48
- @split_id = arg
49
- when "--printLog"
50
- logfilename = fred_dirname(@exp, "eval", "log", "new") +
51
- "eval_logfile.txt"
52
-
53
- else
54
- # case of unknown arguments has been dealt with by fred.rb
55
- end
56
- }
57
-
58
- ###
59
- # make outfile name
60
- outfilename = fred_dirname(@exp, "eval", "eval", "new") +
61
- "eval.txt"
62
-
63
- ###
64
- # do we regard all senses as assigned,
65
- # as long as they surpass some threshold?
66
- # if we are doing multilabel evaluation, we need the full list of senses
67
- @threshold = @exp.get("assignment_confidence_threshold")
68
- @target_obj = Targets.new(@exp, nil, "r")
69
- unless @target_obj.targets_okay
70
- # error during initialization
71
- $stderr.puts "Error: Could not read list of known targets, bailing out."
72
- exit 1
73
- end
74
-
75
- if @threshold or @exp.get("handle_multilabel") == "keep"
76
- @multiple_senses_assigned = true
77
- else
78
- @multiple_senses_assigned = false
79
- end
80
-
81
-
82
- ###
83
- # initialize abstract class behind me
84
- if @multiple_senses_assigned
85
- # we are possibly assigning more than one sense: do precision/recall
86
- # instead of accuracy:
87
- # "true" is what "this sense has been assigned" is mapped to below.
88
- super(outfilename, logfilename, "true")
89
- else
90
- super(outfilename, logfilename)
91
- end
92
-
93
- # what is being done with instances with multiple sense labels?
94
- @handle_multilabel = @exp.get("handle_multilabel")
95
-
96
- ###
97
- # announce the task
98
- $stderr.puts "---------"
99
- $stderr.print "Fred experiment #{@exp.get("experiment_ID")}: Evaluating classifiers"
100
- if @split_dir
101
- $stderr.puts " using split with ID #{@split_id}"
102
- else
103
- $stderr.puts
104
- end
105
- if @multiple_senses_assigned
106
- $stderr.puts "Allowing for the assignment of multiple senses,"
107
- $stderr.puts "computing precision and recall against the full sense list of a lemma."
108
- end
109
- $stderr.puts "Writing result to #{fred_dirname(@exp, "eval", "eval")}"
110
- $stderr.puts "---------"
111
- end
112
-
113
- #####
114
- protected
115
-
116
- ###
117
- # each_group
118
- #
119
- # yield each group name in turn
120
- # in our case, group names are lemmas
121
- #
122
- # also, set object-global variables in such a way
123
- # that the elements of this group can be read
124
- def each_group()
125
-
126
- # access to classifier output files
127
- output_dir = fred_dirname(@exp, "output", "tab")
128
- # access to answer key files
129
-
130
-
131
- if @split_id
132
- # make split object and parameter hash to pass to it
133
- dataset = "train"
134
- else
135
- dataset = "test"
136
- end
137
-
138
- # iterate through instance files
139
- @target_obj.get_lemmas().sort().each { |lemma|
140
- # progress report
141
- if @exp.get("verbose")
142
- $stderr.puts "Evaluating " + lemma
143
- end
144
-
145
- # file with classification results
146
- begin
147
- @classfile = File.new(output_dir + fred_result_filename(lemma))
148
- rescue
149
- # no classification results
150
- @classfile = nil
151
- end
152
-
153
- # file with answers:
154
- # maybe we need to apply a split first
155
- if @split_id
156
- @goldreader = AnswerKeyAccess.new(@exp, "train", lemma, "r", @split_id, "test")
157
- else
158
- @goldreader = AnswerKeyAccess.new(@exp, "test", lemma, "r")
159
- end
160
-
161
- # doing multilabel evaluation?
162
- # then we need a list of all senses
163
- if @multiple_senses_assigned
164
- @all_senses = @target_obj.get_senses(lemma)
165
- else
166
- @all_senses = nil
167
- end
168
-
169
- yield lemma
170
- }
171
- end
172
-
173
- ###
174
- # each_instance
175
- #
176
- # given a lemma name, yield each instance of this lemma in turn,
177
- # or rather: yield pairs [gold_class(string), assigned_class(string)]
178
- #
179
- # relies on each_group() having set the appropriate readers
180
- # <@goldreader> and <@classfile>
181
- def each_instance(lemma) # string: lemma name
182
-
183
- # watch out for repeated instances
184
- # which may occur if handle_multilabel = repeat.
185
- # Only yield them once to avoid re-evaluating multi-label instances
186
- #
187
- # instance_ids_seen: hash target_ids -> true/nil
188
- instance_ids_seen = Hash.new()
189
-
190
- # read gold file and classifier output file in parallel
191
- @goldreader.each { |lemma, pos, target_ids, sid, senses_gold, transformed_gold_senses|
192
-
193
- # classline: format
194
- # (label confidence)*
195
- # such that the label with the highest confidence is first
196
- classline = nil
197
- if @classfile
198
- classline = @classfile.gets()
199
- end
200
- if classline.nil?
201
- classline = ""
202
- end
203
-
204
- # $stderr.puts "HIER0 #{classline} #{@classfile.nil?}"
205
-
206
- # have we done this same instance previously?
207
- if instance_ids_seen[target_ids]
208
- next
209
- end
210
- # instance not seen previously, but mark as seen now.
211
- instance_ids_seen[target_ids] = true
212
-
213
- # determine all assigned senses and their confidence levels
214
- # determine all sense/confidence pairs
215
- # senses assigned: list of pairs [senselist, confidence]
216
- # where senselist is an array of sense strings
217
- senses_assigned = Array.new()
218
- current_sense = nil
219
-
220
- classline.split().each_with_index { |entry, index|
221
- if index % 2 == 0
222
- # we have a sense label
223
- if @handle_multilabel == "join"
224
- # split up joined senses
225
- current_sense = fred_split_sense(entry)
226
- else
227
- current_sense = [entry]
228
- end
229
-
230
- else
231
- # we have a confidence level
232
- senses_assigned << [current_sense, entry.to_f()]
233
- end
234
- }
235
-
236
-
237
- if @threshold
238
- # multiple senses assigned, and
239
- # regard as assigned everything above a given threshold
240
-
241
- # transform senses_assigned:
242
- # in the case of "join", one sense may have several confidence levels,
243
- # one on its own and one in a joined sense
244
- senses_assigned_hash = Hash.new()
245
- senses_assigned.each { |senses, confidence|
246
- senses.each { |s|
247
- # assign to each sense the maximum of its previous confidence
248
- # and this one.
249
- # watch out: confidence may be smaller than zero
250
- if senses_assigned_hash[s]
251
- senses_assigned_hash[s] = [senses_assigned_hash[s], confidence].max()
252
- else
253
- senses_assigned_hash[s] = confidence
254
- end
255
- }
256
- }
257
-
258
- # select all sense/confidence pairs where confidence is above threshold
259
- senses_assigned = senses_assigned_hash.to_a().select { |sense, confidence|
260
- confidence >= @threshold
261
- }.map { |sense, confidence|
262
- # then retain only the sense, not the confidence
263
- sense
264
- }
265
-
266
-
267
- unless @all_senses
268
- raise "Shouldn't be here"
269
- end
270
-
271
- # for each sense out of the list of all senses:
272
- # yield a pair of [applies, has been assigned]
273
- # both 'applies' and 'has been assigned' will be
274
- # a string of either 'true' or 'false'
275
- # assignment is accurate if both are the same
276
- @all_senses.each { |sense_of_lemma|
277
- gold_class = (senses_gold.include? sense_of_lemma).to_s()
278
- assigned_class = (senses_assigned.include? sense_of_lemma).to_s()
279
- yield [gold_class, assigned_class]
280
- }
281
-
282
-
283
- else
284
- # regard only one sense as assigned at a time
285
- # count as correct if the list of gold classes
286
- # contains the main assigned class
287
- # (relatively lenient evaluation)
288
-
289
- # actually assigned class: only the one with the
290
- # maximum confidence
291
- # $stderr.puts "HIER5 #{senses_assigned.length()}"
292
-
293
- if senses_assigned.empty?
294
- # nothing to yield
295
- else
296
-
297
- max_senselist = senses_assigned.max { |a, b|
298
- a.last() <=> b.last()
299
- }.first()
300
-
301
-
302
- max_senselist.each { |single_sense|
303
- gold_class = (senses_gold.include? single_sense).to_s()
304
- yield [gold_class, "true"]
305
- }
306
- end
307
-
308
- end
309
- }
310
- end
311
-
312
- end
@@ -1,322 +0,0 @@
1
- class FredFeatureInfo
2
- ###
3
- # class variable:
4
- # list of all known extractors
5
- # add to it using add_feature()
6
- @@extractors = Array.new
7
-
8
- # boolean. set to true after warning messages have been given once
9
- @@warned = false
10
-
11
- ###
12
- # add interface/interpreter
13
- def FredFeatureInfo.add_feature(class_name) # Class object
14
- @@extractors << class_name
15
- end
16
-
17
- ###
18
- def initialize(exp)
19
-
20
- ##
21
- # make list of extractors that are
22
- # required by the user
23
- @features = Array.new
24
- @exp = exp
25
-
26
- # user-chosen extractors:
27
- # returns array of pairs [feature group designator(string), options(array:string)]
28
- exp.get_lf("feature").each { |extractor_name, *options|
29
-
30
- extractor = @@extractors.detect { |e| e.feature_name() == extractor_name }
31
- unless extractor
32
- # no extractor found matching the given designator
33
- unless @@warned
34
- $stderr.puts "Warning: Could not find a feature extractor for #{extractor_name}: skipping."
35
- end
36
- next
37
- end
38
-
39
- # no need to use the options here,
40
- # the feature extractors can get their options themselves.
41
- @features << extractor
42
- }
43
-
44
- # do not print warnings again if another RosyFeatureInfo object is made
45
- @@warned = true
46
- end
47
-
48
- ###
49
- # get_extractor_objects
50
- #
51
- # returns a list of feature extractor objects
52
- def get_extractor_objects()
53
-
54
- return @features.map{ |feature_class|
55
- feature_class.new(@exp)
56
- }
57
- end
58
- end
59
-
60
- ##################################3
61
- class FredFeatureExtractor
62
- ###
63
- # feature name:
64
- # name by which you choose this feature
65
- # in the experiment file
66
- def FredFeatureExtractor.feature_name()
67
- raise "Overwrite me."
68
- end
69
-
70
- ###
71
- # initialize with Fred experiment file object
72
- def initialize(exp)
73
- @exp = exp
74
- end
75
-
76
- ###
77
- # compute features from meta-features
78
- #
79
- # argument: hash
80
- # metafeature_label -> metafeatures
81
- # string -> array:string
82
- #
83
- # yields each feature as a string
84
- def each_feature(feature_hash)
85
- raise "overwrite me"
86
- end
87
-
88
- ######
89
- protected
90
-
91
- def FredFeatureExtractor.announce_me
92
- # AB: In 1.9 constants are symbols.
93
- if Module.constants.include?("FredFeatureInfo") or Module.constants.include?(:FredFeatureInfo)
94
- # yup, we have a class to which we can announce ourselves
95
- FredFeatureInfo.add_feature(eval(self.name))
96
- else
97
- # no interface collector class
98
- # $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
99
- end
100
- end
101
-
102
- end
103
-
104
- #####
105
- # context feature
106
- class FredContextFeatureExtractor < FredFeatureExtractor
107
- FredContextFeatureExtractor.announce_me()
108
-
109
- def FredContextFeatureExtractor.feature_name()
110
- return "context"
111
- end
112
-
113
- ###
114
- def initialize(exp)
115
- super(exp)
116
-
117
- # cxsizes: list of context sizes chosen as features,
118
- # encoded in metafeature labels
119
- # written in a hash for fast access
120
- @cxsizes = Hash.new()
121
- @exp.get_lf("feature", "context").each { |cxsize|
122
- @cxsizes[ "CX" + cxsize.to_s() ] = true
123
- }
124
- end
125
-
126
- ###
127
- def each_feature(feature_hash)
128
- # grf#word#lemma#pos#ne
129
- lemma_index = 2
130
-
131
- feature_hash.each { |ftype, fvalues|
132
- if @cxsizes[ftype]
133
- # this is a context feature of a size chosen
134
- # by the user for featurization
135
-
136
- fvalues.each { |f|
137
- next if f =~ /#####/;
138
- yield ftype + f.split("#")[lemma_index]
139
- }
140
- end
141
- }
142
- end
143
- end
144
-
145
- #####
146
- # context feature: POS separately, small contexts only
147
- class FredContextPOSFeatureExtractor < FredFeatureExtractor
148
- FredContextPOSFeatureExtractor.announce_me()
149
-
150
- def FredContextPOSFeatureExtractor.feature_name()
151
- return "context_pos"
152
- end
153
-
154
- ###
155
- def initialize(exp)
156
- super(exp)
157
-
158
- # cxsizes: list of context sizes chosen as features,
159
- # encoded in metafeature labels
160
- # written in a hash for fast access
161
- @cxsizes = Hash.new()
162
- @exp.get_lf("feature", "context").each { |cxsize|
163
- if cxsize <= 10
164
- @cxsizes[ "CX" + cxsize.to_s() ] = true
165
- end
166
- }
167
- if @cxsizes.empty?
168
- $stderr.puts "context_pos feature warning: will not be computed"
169
- $stderr.puts "as there is no context of size <= 10"
170
- end
171
- end
172
-
173
- ###
174
- def each_feature(feature_hash)
175
- # word#lemma#pos#ne
176
- pos_index = 2
177
-
178
- feature_hash.each { |ftype, fvalues|
179
- if @cxsizes[ftype]
180
- # this is a context feature of a size chosen
181
- # by the user for featurization
182
-
183
- fvalues.each { |f|
184
- yield "POS" + ftype + f.split("#")[pos_index]
185
- }
186
- end
187
- }
188
- end
189
- end
190
-
191
- #####
192
- # bigram/trigram feature
193
- class FredNgramFeatureExtractor < FredFeatureExtractor
194
- FredNgramFeatureExtractor.announce_me()
195
-
196
- def FredNgramFeatureExtractor.feature_name()
197
- return "ngram"
198
- end
199
-
200
- ###
201
- def initialize(exp)
202
- super(exp)
203
-
204
- # cxsize: context size from which the ngram feature will be computed
205
- # encoded in metafeature labels
206
- # written in a hash for fast access
207
- @cxsize = @exp.get_lf("feature", "context").detect { |cxsize|
208
- cxsize >= 2
209
- }
210
- unless @cxsize
211
- $stderr.puts "Warning: no context of size >= 2, so"
212
- $stderr.puts "no ngram feature computed."
213
- end
214
- end
215
-
216
- ###
217
- def each_feature(feature_hash)
218
- # word#lemma#pos#ne
219
- lemma_index = 1
220
- pos_index = 2
221
-
222
- feature_hash.each { |ftype, fvalues|
223
- if ftype == "CX" + @cxsize.to_s()
224
- # compute the ngram features from this context
225
- # |fvalues| = 2*cxsize, that is, cxsize describes
226
- # the length of a one-sided context window
227
- # the bigram of features around the target
228
- # concerns fvalues[cxsize-1] and fvalues[cxsize]
229
- # the trigram of two words before, one word after includes
230
- # fvalues[cxsize-2], fvalues[cxsize-1] and fvalues[cxsize]
231
-
232
- [
233
- [[-1, 0], "BLEM", lemma_index], # bigram of lemmas
234
- [[-1, 0], "BPOS", pos_index], # bigram of POSs
235
- [[-2, -1, 0], "TLEM", lemma_index], # trigram of lemmas
236
- [[-2, -1, 0], "TPOS", pos_index] # trigram of POSs
237
- ].each { |f_indices, label, subindex|
238
- fs = f_indices.map { |i| fvalues[@cxsize+i] }.compact()
239
- if fs.length() == f_indices.length()
240
- # we successfully extracted entries for all the given indices
241
- yield label + fs.map { |f| f.split("#")[subindex] }.join()
242
- end
243
- }
244
- end
245
- }
246
- end
247
- end
248
-
249
-
250
- #####
251
- # syntax feature
252
- class FredSynFeatureExtractor < FredFeatureExtractor
253
- FredSynFeatureExtractor.announce_me()
254
-
255
- def FredSynFeatureExtractor.feature_name()
256
- return "syntax"
257
- end
258
-
259
- ###
260
- def each_feature(feature_hash)
261
-
262
- feature_hash.each { |ftype, fvalues|
263
-
264
- case ftype
265
- when "CH", "PA"
266
- grf_index = 0
267
-
268
- fvalues.each { |f|
269
- yield ftype + f.split("#")[grf_index]
270
- }
271
-
272
- when "SI"
273
- # parentlemma#grf#word#lemma#pos#ne
274
- grf_index = 1
275
-
276
- fvalues.each { |f|
277
- yield ftype + f.split("#")[grf_index]
278
- }
279
-
280
- else
281
- # not a syntactic metafeature
282
- end
283
- }
284
- end
285
- end
286
-
287
-
288
-
289
-
290
- #####
291
- # syntax-plus-headword feature
292
- class FredSynsemFeatureExtractor < FredFeatureExtractor
293
- FredSynsemFeatureExtractor.announce_me()
294
-
295
- def FredSynsemFeatureExtractor.feature_name()
296
- return "synsem"
297
- end
298
-
299
- ###
300
- def each_feature(feature_hash)
301
-
302
- feature_hash.each { |ftype, fvalues|
303
- case ftype
304
- when "CH", "PA"
305
- # grf#word#lemma#pos#ne
306
- fvalues.each { |f|
307
- yield ftype + "SEM" + f
308
- }
309
-
310
- when "SI"
311
- # parentlemma#grf#word#lemma#pos#ne
312
- # remove parent lemma
313
- fvalues.each { |f|
314
- yield ftype + "SEM" + f.split("#")[1..-1].join("#")
315
- }
316
-
317
- else
318
- # not a syntax feature
319
- end
320
- }
321
- end
322
- end