shalmaneser-fred 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/fred +8 -3
- data/lib/fred/FredConventions.rb +190 -189
- data/lib/fred/abstract_context_provider.rb +246 -0
- data/lib/fred/abstract_fred_feature_access.rb +43 -0
- data/lib/fred/answer_key_access.rb +130 -0
- data/lib/fred/aux_keep_writers.rb +94 -0
- data/lib/fred/baseline.rb +153 -0
- data/lib/fred/context_provider.rb +55 -0
- data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
- data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
- data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
- data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
- data/lib/fred/feature_extractors.rb +5 -0
- data/lib/fred/file_zipped.rb +43 -0
- data/lib/fred/find_all_targets.rb +94 -0
- data/lib/fred/find_targets_from_frames.rb +92 -0
- data/lib/fred/fred.rb +43 -40
- data/lib/fred/fred_error.rb +15 -0
- data/lib/fred/fred_eval.rb +311 -0
- data/lib/fred/fred_feature_access.rb +420 -0
- data/lib/fred/fred_feature_info.rb +56 -0
- data/lib/fred/fred_featurize.rb +525 -0
- data/lib/fred/fred_parameters.rb +190 -0
- data/lib/fred/fred_split.rb +86 -0
- data/lib/fred/fred_split_pkg.rb +189 -0
- data/lib/fred/fred_test.rb +571 -0
- data/lib/fred/fred_train.rb +125 -0
- data/lib/fred/grammatical_function_access.rb +63 -0
- data/lib/fred/md5.rb +6 -0
- data/lib/fred/meta_feature_access.rb +185 -0
- data/lib/fred/non_contiguous_context_provider.rb +532 -0
- data/lib/fred/opt_parser.rb +182 -161
- data/lib/fred/plot_and_r_eval.rb +486 -0
- data/lib/fred/single_sent_context_provider.rb +76 -0
- data/lib/fred/slide_var.rb +148 -0
- data/lib/fred/targets.rb +136 -0
- data/lib/fred/toggle_var.rb +61 -0
- data/lib/fred/word_lemma_pos_ne.rb +51 -0
- data/lib/fred/write_features_binary.rb +95 -0
- data/lib/fred/write_features_nary.rb +51 -0
- data/lib/fred/write_features_nary_or_binary.rb +51 -0
- data/lib/shalmaneser/fred.rb +1 -0
- metadata +57 -30
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred_config_data.rb +0 -185
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -58
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -99
- data/test/functional/test_rosy.rb +0 -40
@@ -0,0 +1,148 @@
|
|
1
|
+
module Shalmaneser
|
2
|
+
##########################################
|
3
|
+
module Fred
|
4
|
+
################
|
5
|
+
# SlideVar:
|
6
|
+
# keeps a single sliding variable,
|
7
|
+
# has an iterator that yields each value of the slide as a pair
|
8
|
+
# [lhs, rhs] to be passed on to FredConfigData.set_entry()
|
9
|
+
#
|
10
|
+
# Initialization with the value of a --slide command line parameter.
|
11
|
+
# Valid forms:
|
12
|
+
#
|
13
|
+
# feature=<f>:<what>:<start>-<end>:<slide>
|
14
|
+
# with f in { context, ngram, syn, grfunc, fe }
|
15
|
+
# what in { weight, dist } (dist only available for context)
|
16
|
+
# start, end, slide floats represented as strings
|
17
|
+
#
|
18
|
+
# <var>:<start>-<end>:<slide>
|
19
|
+
# with var in { smoothing_lambda, window_size }
|
20
|
+
class SlideVar
|
21
|
+
attr_reader :var_name
|
22
|
+
|
23
|
+
def initialize(string, # value of --slide parameter
|
24
|
+
exp) # FredConfigData object
|
25
|
+
|
26
|
+
# keep start and end value and step size for the sliding
|
27
|
+
@startval = @endval = @step = @current = 0.0
|
28
|
+
|
29
|
+
# setting experiment file values for each step of the sliding:
|
30
|
+
# remember lhs and rhs of what needs to be set.
|
31
|
+
# rhs contains a string REPLACEME to be replaced by the current value
|
32
|
+
@exp_lhs = ""
|
33
|
+
@exp_rhs = ""
|
34
|
+
@var_name = ""
|
35
|
+
@remove_list_variable_regexp = nil # set non-nil if we need unset_list_entry()
|
36
|
+
|
37
|
+
if string == ""
|
38
|
+
# empty slide variable
|
39
|
+
return
|
40
|
+
end
|
41
|
+
|
42
|
+
if string =~ /^feature=(\w+):(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
|
43
|
+
# --slide feature=ngram:weight:0.8-4.0:0.3
|
44
|
+
# --slide feature=context:dist:0.7-0.9:0.05
|
45
|
+
|
46
|
+
featurename = $1
|
47
|
+
parname = $2
|
48
|
+
@startval = $3.to_f
|
49
|
+
@endval = $4.to_f
|
50
|
+
@step = $5.to_f
|
51
|
+
|
52
|
+
@exp_lhs = "feature"
|
53
|
+
|
54
|
+
if featurename == "context"
|
55
|
+
# both weight and dist possible
|
56
|
+
|
57
|
+
case parname
|
58
|
+
when "weight"
|
59
|
+
@exp_rhs = "#{featurename} REPLACEME #{exp.get_lf("feature", "context", "wtdist")}"
|
60
|
+
when "dist"
|
61
|
+
@exp_rhs = "#{featurename} #{exp.get_lf("feature", "context", "weight")} REPLACEME"
|
62
|
+
else
|
63
|
+
raise "Error in argument of --slide: I found a value of neither 'weight' nor 'dist': "+ parname
|
64
|
+
end
|
65
|
+
|
66
|
+
if exp.get_lf("feature", "context", "mwedist")
|
67
|
+
@exp_rhs << " mwedist"
|
68
|
+
end
|
69
|
+
|
70
|
+
else
|
71
|
+
# feature name not "context": only weight possible
|
72
|
+
unless parname == "weight"
|
73
|
+
raise "Error in argument of --slide: can only do 'weight', what I got is "+ parname
|
74
|
+
end
|
75
|
+
|
76
|
+
@exp_rhs = "#{featurename} REPLACEME"
|
77
|
+
end
|
78
|
+
|
79
|
+
@var_name = "feature #{featurename} #{parname}"
|
80
|
+
@remove_list_variable_regexp = Regexp.new("^#{featurename}\s")
|
81
|
+
|
82
|
+
elsif string =~ /^(\w+):([\d\.]+)-([\d\.]+):([\d\.]+)$/
|
83
|
+
# --slide window_size:0-4:1
|
84
|
+
# --slide smoothing_lambda:0.3-0.9:0.05
|
85
|
+
|
86
|
+
featurename = $1
|
87
|
+
case exp.get_type(featurename)
|
88
|
+
when "integer"
|
89
|
+
@startval = $2.to_i
|
90
|
+
@endval = $3.to_i
|
91
|
+
@step = $4.to_i
|
92
|
+
when "float"
|
93
|
+
@startval = $2.to_f
|
94
|
+
@endval = $3.to_f
|
95
|
+
@step = $4.to_f
|
96
|
+
else
|
97
|
+
raise "Unslidable variable "+ featurename
|
98
|
+
end
|
99
|
+
|
100
|
+
@exp_lhs = featurename
|
101
|
+
@exp_rhs = "REPLACEME"
|
102
|
+
@var_name = featurename
|
103
|
+
|
104
|
+
else
|
105
|
+
# not a valid argument to --slide
|
106
|
+
|
107
|
+
raise "Sorry, could not parse argument of --slide. \nI got: "+ string
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
####
|
112
|
+
# iterate through each value of the slide variable (if there is a slide variable)
|
113
|
+
# and set it in the experiment file data structure
|
114
|
+
#
|
115
|
+
# also yield a descriptive text of the current setting
|
116
|
+
def each_slide_value(exp) # FredConfigData object
|
117
|
+
|
118
|
+
if empty?
|
119
|
+
# no slide variable
|
120
|
+
|
121
|
+
yield [0, ""]
|
122
|
+
return
|
123
|
+
|
124
|
+
else
|
125
|
+
# the slide variable is nonempty
|
126
|
+
|
127
|
+
@current = @startval
|
128
|
+
|
129
|
+
while @current <= @endval
|
130
|
+
|
131
|
+
if @remove_list_variable_regexp
|
132
|
+
# we have a list feature that we first need to unset before setting it
|
133
|
+
exp.unset_list_entry(@exp_lhs, @remove_list_variable_regexp)
|
134
|
+
end
|
135
|
+
exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, @current.to_s))
|
136
|
+
|
137
|
+
yield [@current, @var_name + "=" + @current.to_s]
|
138
|
+
@current += @step
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def empty?
|
144
|
+
@exp_lhs.empty?
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
data/lib/fred/targets.rb
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
require "fred/file_zipped"
|
2
|
+
require 'fred/FredConventions' # !
|
3
|
+
|
4
|
+
module Shalmaneser
|
5
|
+
module Fred
|
6
|
+
class Targets
|
7
|
+
attr_reader :targets_okay
|
8
|
+
|
9
|
+
###
|
10
|
+
def initialize(exp, # experiment file object
|
11
|
+
interpreter_class, # SynInterpreter class, or nil
|
12
|
+
mode) # string: "r", "w", "a", as in files
|
13
|
+
@exp = exp
|
14
|
+
@interpreter_class = interpreter_class
|
15
|
+
|
16
|
+
# keep recorded targets here.
|
17
|
+
# try to read old list now.
|
18
|
+
@targets = {}
|
19
|
+
|
20
|
+
# write target info in the classifier directory.
|
21
|
+
# This is _not_ dependent on a potential split ID
|
22
|
+
@dir = File.new_dir(::Shalmaneser::Fred.fred_classifier_directory(@exp), "targets")
|
23
|
+
|
24
|
+
@targets_okay = true
|
25
|
+
case mode
|
26
|
+
when "w"
|
27
|
+
# start from scratch, no list of targets
|
28
|
+
when "a", "r"
|
29
|
+
# read existing file containing targets
|
30
|
+
begin
|
31
|
+
file = FileZipped.new(@dir + "targets.txt.gz")
|
32
|
+
rescue
|
33
|
+
# no pickle present: signal this
|
34
|
+
@targets_okay = false
|
35
|
+
return
|
36
|
+
end
|
37
|
+
file.each { |line|
|
38
|
+
line.chomp!
|
39
|
+
if line =~ /^LEMMA (.+) SENSES (.+)$/
|
40
|
+
lemmapos = $1
|
41
|
+
senses = $2.split
|
42
|
+
lemmapos.gsub!(/ /, '_')
|
43
|
+
#lemmapos.gsub!(/\.[A-Z]\./, '.')
|
44
|
+
@targets[lemmapos] = senses
|
45
|
+
end
|
46
|
+
}
|
47
|
+
|
48
|
+
else
|
49
|
+
$stderr.puts "Error: shouldn't be here."
|
50
|
+
exit 1
|
51
|
+
end
|
52
|
+
|
53
|
+
if ["w", "a"].include? mode
|
54
|
+
@record_targets = true
|
55
|
+
else
|
56
|
+
@record_targets = false
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
###
|
61
|
+
# determine_targets:
|
62
|
+
# for a given SalsaTigerSentence,
|
63
|
+
# determine all targets,
|
64
|
+
# each as a _single_ main terminal node
|
65
|
+
#
|
66
|
+
# We need a single terminal node in order
|
67
|
+
# to compute the context window
|
68
|
+
#
|
69
|
+
# returns:
|
70
|
+
# hash: target_IDs -> list of senses
|
71
|
+
# where target_IDs is a pair [list of terminal IDs, main terminal ID]
|
72
|
+
#
|
73
|
+
# where a sense is represented as a hash:
|
74
|
+
# "sense": sense, a string
|
75
|
+
# "obj": FrameNode object
|
76
|
+
# "all_targets": list of node IDs, may comprise more than a single node
|
77
|
+
# "lex": lemma, or multiword expression in canonical form
|
78
|
+
# "sid": sentence ID
|
79
|
+
def determine_targets(sent)
|
80
|
+
raise "overwrite me"
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# returns a list of lemma-pos combined strings
|
85
|
+
def get_lemmas
|
86
|
+
return @targets.keys
|
87
|
+
end
|
88
|
+
|
89
|
+
##
|
90
|
+
# access to lemmas and POS, returns a list of pairs [lemma, pos] (string*string)
|
91
|
+
def get_lemma_pos
|
92
|
+
@targets.keys.map { |lemmapos| ::Shalmaneser::Fred.fred_lemmapos_separate(lemmapos) }
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# access to senses
|
97
|
+
def get_senses(lemmapos) # string, result of fred_lemmapos_combine
|
98
|
+
@targets[lemmapos] ? @targets[lemmapos] : []
|
99
|
+
end
|
100
|
+
|
101
|
+
##
|
102
|
+
# write file
|
103
|
+
def done_reading_targets
|
104
|
+
begin
|
105
|
+
file = FileZipped.new(@dir + "targets.txt.gz", "w")
|
106
|
+
rescue
|
107
|
+
$stderr.puts "Error: Could not write file #{@dir}targets.txt.gz"
|
108
|
+
exit 1
|
109
|
+
end
|
110
|
+
|
111
|
+
@targets.each_pair { |lemma, senses|
|
112
|
+
file.puts "LEMMA #{lemma} SENSES "+ senses.join(" ")
|
113
|
+
}
|
114
|
+
|
115
|
+
file.close
|
116
|
+
end
|
117
|
+
|
118
|
+
###############################
|
119
|
+
protected
|
120
|
+
|
121
|
+
##
|
122
|
+
# record: record occurrence of a lemma/sense pair
|
123
|
+
# <@targets> data structure
|
124
|
+
def record(target_info)
|
125
|
+
lemmapos = ::Shalmaneser::Fred.fred_lemmapos_combine(target_info["lex"], target_info["pos"])
|
126
|
+
unless @targets[lemmapos]
|
127
|
+
@targets[lemmapos] = []
|
128
|
+
end
|
129
|
+
|
130
|
+
unless @targets[lemmapos].include? target_info["sense"]
|
131
|
+
@targets[lemmapos] << target_info["sense"]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Shalmaneser
|
2
|
+
module Fred
|
3
|
+
################
|
4
|
+
# ToggleVar:
|
5
|
+
# keeps a single toggle variable,
|
6
|
+
# and has a method that sets this toggle variable to a given value
|
7
|
+
# in the experiment file data structure.
|
8
|
+
class ToggleVar
|
9
|
+
attr_reader :var_name
|
10
|
+
|
11
|
+
def initialize(string, # part of value of --slide parameter, which has been split at :
|
12
|
+
exp) # FredConfigData object
|
13
|
+
|
14
|
+
if string =~ /^feature_dim=(\w+)$/
|
15
|
+
# feature dimension
|
16
|
+
|
17
|
+
@exp_lhs = "feature_dim"
|
18
|
+
@exp_rhs = $1
|
19
|
+
@unset_at_false = true # for false, un-set list valued parameter in set_value_to()
|
20
|
+
@var_name = "feature_dim #{@exp_rhs}"
|
21
|
+
|
22
|
+
unless ["word", "lemma", "pos", "ne"].include? @exp_rhs
|
23
|
+
raise "Unknown feature dimension "+ @exp_rhs
|
24
|
+
end
|
25
|
+
|
26
|
+
else
|
27
|
+
# normal variable
|
28
|
+
unless exp.get_type(string) == "bool"
|
29
|
+
raise "Unknown value in --toggle: "+ string
|
30
|
+
end
|
31
|
+
|
32
|
+
if ["use_fn_gf", "window_size"].include? string
|
33
|
+
raise "Sorry, cannot toggle #{string}, since this variable takes its effect during featurization."
|
34
|
+
end
|
35
|
+
|
36
|
+
@exp_lhs = string
|
37
|
+
@exp_rhs = "REPLACEME"
|
38
|
+
@unset_at_false = false # for false, set parameter to false in set_value_to
|
39
|
+
@var_name = @exp_lhs
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
###
|
44
|
+
# set the value of my toggle variable to the given boolean
|
45
|
+
# in the given experiment file data structure.
|
46
|
+
#
|
47
|
+
# returns a descriptive text of the current setting
|
48
|
+
def set_value_to(boolean, # true, false
|
49
|
+
exp) # FredConfigData object
|
50
|
+
|
51
|
+
if @unset_at_false and not(boolean)
|
52
|
+
exp.unset_list_entry(@exp_lhs, @exp_rhs)
|
53
|
+
else
|
54
|
+
exp.set_entry(@exp_lhs, @exp_rhs.sub(/REPLACEME/, boolean.to_s))
|
55
|
+
end
|
56
|
+
|
57
|
+
return @var_name + "=" + boolean.to_s
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'salsa_tiger_xml/salsa_tiger_xml_helper'
|
2
|
+
|
3
|
+
########################################
|
4
|
+
# given a SynNode object representing a terminal,
|
5
|
+
# return:
|
6
|
+
# - the word
|
7
|
+
# - the lemma
|
8
|
+
# - the part of speech
|
9
|
+
# - the named entity (if any)
|
10
|
+
#
|
11
|
+
# as a tuple
|
12
|
+
#
|
13
|
+
# WARNING: word and lemma are turned to lowercase
|
14
|
+
module Shalmaneser
|
15
|
+
module Fred
|
16
|
+
module WordLemmaPosNe
|
17
|
+
# @param syn_obj [SynNode]
|
18
|
+
# @param i [SynInterpreter]
|
19
|
+
def word_lemma_pos_ne(syn_obj, i)
|
20
|
+
unless syn_obj.is_terminal?
|
21
|
+
$stderr.puts "Featurization warning: unexpectedly received non-terminal"
|
22
|
+
return [nil, nil, nil, nil]
|
23
|
+
end
|
24
|
+
|
25
|
+
word = syn_obj.word
|
26
|
+
if word
|
27
|
+
word.downcase!
|
28
|
+
end
|
29
|
+
|
30
|
+
lemma = i.lemma_backoff(syn_obj)
|
31
|
+
if lemma and STXML::SalsaTigerXMLHelper.unescape(lemma) == "<unknown>"
|
32
|
+
lemma = nil
|
33
|
+
end
|
34
|
+
|
35
|
+
if lemma
|
36
|
+
lemma.downcase!
|
37
|
+
end
|
38
|
+
|
39
|
+
pos = syn_obj.part_of_speech
|
40
|
+
|
41
|
+
ne = syn_obj.get_attribute("ne")
|
42
|
+
|
43
|
+
unless ne
|
44
|
+
ne = syn_obj.get_attribute("headof_ne")
|
45
|
+
end
|
46
|
+
|
47
|
+
[word, lemma, pos, ne]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
###
|
2
|
+
# Features for binary classifiers
|
3
|
+
require 'fred/FredConventions' # !
|
4
|
+
|
5
|
+
module Shalmaneser
|
6
|
+
module Fred
|
7
|
+
class WriteFeaturesBinary
|
8
|
+
def initialize(lemma,
|
9
|
+
exp,
|
10
|
+
dataset,
|
11
|
+
feature_dir)
|
12
|
+
@dir = feature_dir
|
13
|
+
@lemma = lemma
|
14
|
+
@feature_dir = feature_dir
|
15
|
+
|
16
|
+
@negsense = exp.get("negsense")
|
17
|
+
unless @negsense
|
18
|
+
@negsense = "NONE"
|
19
|
+
end
|
20
|
+
|
21
|
+
# files: sense-> filename
|
22
|
+
@files = {}
|
23
|
+
|
24
|
+
# keep all instances such that, when a new sense comes around,
|
25
|
+
# we can write them for that sense
|
26
|
+
@instances = []
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def write_instance(features, senses)
|
31
|
+
# sense we haven't seen before? Then we need to
|
32
|
+
# write the whole featurization file for that new sense
|
33
|
+
check_for_presence_of_senses(senses)
|
34
|
+
|
35
|
+
# write this new instance for all senses
|
36
|
+
@files.each_key { |sense_of_file|
|
37
|
+
write_to_sensefile(features, senses, sense_of_file)
|
38
|
+
}
|
39
|
+
|
40
|
+
# store instance in case another sense crops up later
|
41
|
+
@instances << [features, senses]
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
###
|
46
|
+
def close
|
47
|
+
@files.each_value { |f| f.close }
|
48
|
+
end
|
49
|
+
|
50
|
+
######
|
51
|
+
private
|
52
|
+
|
53
|
+
def check_for_presence_of_senses(senses)
|
54
|
+
senses.each { |sense|
|
55
|
+
# do we have a sense file for this sense?
|
56
|
+
unless @files[sense]
|
57
|
+
# open new file for this sense
|
58
|
+
@files[sense] = File.new(@feature_dir + ::Shalmaneser::Fred.fred_feature_filename(@lemma, sense, true), "w")
|
59
|
+
# filename = @feature_dir + Fred.fred_feature_filename(@lemma, sense, true)
|
60
|
+
# $stderr.puts "Starting new feature file #{filename}"
|
61
|
+
|
62
|
+
# and re-write all previous instances for it
|
63
|
+
@instances.each { |prev_features, prev_senses|
|
64
|
+
write_to_sensefile(prev_features, prev_senses,
|
65
|
+
sense)
|
66
|
+
}
|
67
|
+
end
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
###
|
72
|
+
def write_to_sensefile(features, senses,
|
73
|
+
sense_of_file)
|
74
|
+
# file to write to
|
75
|
+
f = @files[sense_of_file]
|
76
|
+
|
77
|
+
# print features
|
78
|
+
f.print features.map { |x|
|
79
|
+
x.to_s.gsub(/,/, "COMMA")
|
80
|
+
}.join(",")
|
81
|
+
|
82
|
+
f.print ","
|
83
|
+
|
84
|
+
# binarize target class
|
85
|
+
if senses.include? sense_of_file
|
86
|
+
# $stderr.puts "writing POS #{sense_of_file}"
|
87
|
+
f.puts sense_of_file.to_s
|
88
|
+
else
|
89
|
+
# $stderr.puts "writing NEG #{negsense}"
|
90
|
+
f.puts @negsense
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'fred/FredConventions' # !
|
2
|
+
module Shalmaneser
|
3
|
+
module Fred
|
4
|
+
##############
|
5
|
+
# write features,
|
6
|
+
# either lemma-wise
|
7
|
+
# or lemma+sense-wise
|
8
|
+
# if lemma+sense-wise, write as binary classifier,
|
9
|
+
# i.e. map the target senses
|
10
|
+
#
|
11
|
+
# Use Delegator.
|
12
|
+
###
|
13
|
+
# Features for N-ary classifiers
|
14
|
+
class WriteFeaturesNary
|
15
|
+
def initialize(lemma,
|
16
|
+
exp,
|
17
|
+
dataset,
|
18
|
+
feature_dir)
|
19
|
+
|
20
|
+
@filename = feature_dir + ::Shalmaneser::Fred.fred_feature_filename(lemma)
|
21
|
+
@f = File.new(@filename, "w")
|
22
|
+
@handle_multilabel = exp.get("handle_multilabel")
|
23
|
+
end
|
24
|
+
|
25
|
+
def write_instance(features, senses)
|
26
|
+
@f.print features.map { |x|
|
27
|
+
x.to_s.gsub(/,/, "COMMA").gsub(/;/, "SEMICOLON")
|
28
|
+
}.join(",")
|
29
|
+
|
30
|
+
# possibly more than one sense? then use semicolon to separate
|
31
|
+
if @handle_multilabel == "keep"
|
32
|
+
# possibly more than one sense:
|
33
|
+
# separate by semicolon,
|
34
|
+
# and hope that the classifier knows this
|
35
|
+
@f.print ";"
|
36
|
+
@f.puts senses.map {|x|
|
37
|
+
x.to_s.gsub(/,/, "COMMA").gsub(/;/, "SEMICOLON")
|
38
|
+
}.join(",")
|
39
|
+
else
|
40
|
+
# one sense: just separate by comma
|
41
|
+
@f.print ","
|
42
|
+
@f.puts senses.first.to_s.gsub(/,/, "COMMA").gsub(/;/, "SEMICOLON")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def close
|
47
|
+
@f.close
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require "delegate"
|
2
|
+
require 'fred/FredConventions'
|
3
|
+
require 'fred/write_features_binary'
|
4
|
+
require 'fred/write_features_nary'
|
5
|
+
|
6
|
+
module Shalmaneser
|
7
|
+
module Fred
|
8
|
+
########
|
9
|
+
# class writing features:
|
10
|
+
# delegating to either a binary or an n-ary writer
|
11
|
+
class WriteFeaturesNaryOrBinary < SimpleDelegator
|
12
|
+
###
|
13
|
+
def initialize(lemma,
|
14
|
+
exp,
|
15
|
+
dataset)
|
16
|
+
feature_dir = WriteFeaturesNaryOrBinary.feature_dir(exp, dataset, "new")
|
17
|
+
if exp.get("binary_classifiers")
|
18
|
+
# binary classifiers
|
19
|
+
# $stderr.puts "Writing binary feature data."
|
20
|
+
|
21
|
+
# delegate writing to the binary feature writer
|
22
|
+
@writer = WriteFeaturesBinary.new(lemma, exp, dataset, feature_dir)
|
23
|
+
super(@writer)
|
24
|
+
|
25
|
+
else
|
26
|
+
# n-ary classifiers
|
27
|
+
# $stderr.puts "Writing n-ary feature data."
|
28
|
+
|
29
|
+
# delegate writing to the n-ary feature writer
|
30
|
+
@writer = WriteFeaturesNary.new(lemma, exp, dataset, feature_dir)
|
31
|
+
super(@writer)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.feature_dir(exp, dataset, mode = "existing")
|
36
|
+
::Shalmaneser::Fred.fred_dirname(exp, dataset, "features", mode)
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
def self.remove_files(exp, dataset)
|
41
|
+
feature_dir = WriteFeaturesNaryOrBinary.feature_dir(exp, dataset, "new")
|
42
|
+
|
43
|
+
Dir[feature_dir + ::Shalmaneser::Fred.fred_feature_filename("*")].each do |filename|
|
44
|
+
if File.exist? filename
|
45
|
+
File.delete(filename)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
# A dummy file to require for now.
|