shalmaneser-fred 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/fred +8 -3
- data/lib/fred/FredConventions.rb +190 -189
- data/lib/fred/abstract_context_provider.rb +246 -0
- data/lib/fred/abstract_fred_feature_access.rb +43 -0
- data/lib/fred/answer_key_access.rb +130 -0
- data/lib/fred/aux_keep_writers.rb +94 -0
- data/lib/fred/baseline.rb +153 -0
- data/lib/fred/context_provider.rb +55 -0
- data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
- data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
- data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
- data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
- data/lib/fred/feature_extractors.rb +5 -0
- data/lib/fred/file_zipped.rb +43 -0
- data/lib/fred/find_all_targets.rb +94 -0
- data/lib/fred/find_targets_from_frames.rb +92 -0
- data/lib/fred/fred.rb +43 -40
- data/lib/fred/fred_error.rb +15 -0
- data/lib/fred/fred_eval.rb +311 -0
- data/lib/fred/fred_feature_access.rb +420 -0
- data/lib/fred/fred_feature_info.rb +56 -0
- data/lib/fred/fred_featurize.rb +525 -0
- data/lib/fred/fred_parameters.rb +190 -0
- data/lib/fred/fred_split.rb +86 -0
- data/lib/fred/fred_split_pkg.rb +189 -0
- data/lib/fred/fred_test.rb +571 -0
- data/lib/fred/fred_train.rb +125 -0
- data/lib/fred/grammatical_function_access.rb +63 -0
- data/lib/fred/md5.rb +6 -0
- data/lib/fred/meta_feature_access.rb +185 -0
- data/lib/fred/non_contiguous_context_provider.rb +532 -0
- data/lib/fred/opt_parser.rb +182 -161
- data/lib/fred/plot_and_r_eval.rb +486 -0
- data/lib/fred/single_sent_context_provider.rb +76 -0
- data/lib/fred/slide_var.rb +148 -0
- data/lib/fred/targets.rb +136 -0
- data/lib/fred/toggle_var.rb +61 -0
- data/lib/fred/word_lemma_pos_ne.rb +51 -0
- data/lib/fred/write_features_binary.rb +95 -0
- data/lib/fred/write_features_nary.rb +51 -0
- data/lib/fred/write_features_nary_or_binary.rb +51 -0
- data/lib/shalmaneser/fred.rb +1 -0
- metadata +57 -30
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred_config_data.rb +0 -185
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -58
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -99
- data/test/functional/test_rosy.rb +0 -40
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shalmaneser-fred
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.rc5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Beliankou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: shalmaneser-lib
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.rc5
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.2.rc5
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: mysql
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,31 +53,49 @@ files:
|
|
39
53
|
- LICENSE.md
|
40
54
|
- README.md
|
41
55
|
- bin/fred
|
42
|
-
- lib/fred/Baseline.rb
|
43
|
-
- lib/fred/FileZipped.rb
|
44
|
-
- lib/fred/FredBOWContext.rb
|
45
56
|
- lib/fred/FredConventions.rb
|
46
|
-
- lib/fred/
|
47
|
-
- lib/fred/
|
48
|
-
- lib/fred/
|
49
|
-
- lib/fred/
|
50
|
-
- lib/fred/
|
51
|
-
- lib/fred/
|
52
|
-
- lib/fred/
|
53
|
-
- lib/fred/
|
54
|
-
- lib/fred/
|
55
|
-
- lib/fred/
|
56
|
-
- lib/fred/
|
57
|
-
- lib/fred/
|
57
|
+
- lib/fred/abstract_context_provider.rb
|
58
|
+
- lib/fred/abstract_fred_feature_access.rb
|
59
|
+
- lib/fred/answer_key_access.rb
|
60
|
+
- lib/fred/aux_keep_writers.rb
|
61
|
+
- lib/fred/baseline.rb
|
62
|
+
- lib/fred/context_provider.rb
|
63
|
+
- lib/fred/feature_extractors.rb
|
64
|
+
- lib/fred/feature_extractors/fred_context_feature_extractor.rb
|
65
|
+
- lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb
|
66
|
+
- lib/fred/feature_extractors/fred_feature_extractor.rb
|
67
|
+
- lib/fred/feature_extractors/fred_ngram_feature_extractor.rb
|
68
|
+
- lib/fred/feature_extractors/fred_syn_feature_extractor.rb
|
69
|
+
- lib/fred/feature_extractors/fred_synsem_feature_extractor.rb
|
70
|
+
- lib/fred/file_zipped.rb
|
71
|
+
- lib/fred/find_all_targets.rb
|
72
|
+
- lib/fred/find_targets_from_frames.rb
|
58
73
|
- lib/fred/fred.rb
|
59
|
-
- lib/fred/
|
74
|
+
- lib/fred/fred_error.rb
|
75
|
+
- lib/fred/fred_eval.rb
|
76
|
+
- lib/fred/fred_feature_access.rb
|
77
|
+
- lib/fred/fred_feature_info.rb
|
78
|
+
- lib/fred/fred_featurize.rb
|
79
|
+
- lib/fred/fred_parameters.rb
|
80
|
+
- lib/fred/fred_split.rb
|
81
|
+
- lib/fred/fred_split_pkg.rb
|
82
|
+
- lib/fred/fred_test.rb
|
83
|
+
- lib/fred/fred_train.rb
|
84
|
+
- lib/fred/grammatical_function_access.rb
|
60
85
|
- lib/fred/md5.rb
|
86
|
+
- lib/fred/meta_feature_access.rb
|
87
|
+
- lib/fred/non_contiguous_context_provider.rb
|
61
88
|
- lib/fred/opt_parser.rb
|
62
|
-
-
|
63
|
-
-
|
64
|
-
-
|
65
|
-
-
|
66
|
-
-
|
89
|
+
- lib/fred/plot_and_r_eval.rb
|
90
|
+
- lib/fred/single_sent_context_provider.rb
|
91
|
+
- lib/fred/slide_var.rb
|
92
|
+
- lib/fred/targets.rb
|
93
|
+
- lib/fred/toggle_var.rb
|
94
|
+
- lib/fred/word_lemma_pos_ne.rb
|
95
|
+
- lib/fred/write_features_binary.rb
|
96
|
+
- lib/fred/write_features_nary.rb
|
97
|
+
- lib/fred/write_features_nary_or_binary.rb
|
98
|
+
- lib/shalmaneser/fred.rb
|
67
99
|
homepage: https://github.com/arbox/shalmaneser
|
68
100
|
licenses:
|
69
101
|
- GPL-2.0
|
@@ -86,14 +118,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
118
|
version: 1.3.1
|
87
119
|
requirements: []
|
88
120
|
rubyforge_project:
|
89
|
-
rubygems_version: 2.
|
121
|
+
rubygems_version: 2.5.1
|
90
122
|
signing_key:
|
91
123
|
specification_version: 4
|
92
124
|
summary: FRED
|
93
|
-
test_files:
|
94
|
-
- test/frprep/test_opt_parser.rb
|
95
|
-
- test/functional/functional_test_helper.rb
|
96
|
-
- test/functional/test_fred.rb
|
97
|
-
- test/functional/test_frprep.rb
|
98
|
-
- test/functional/test_rosy.rb
|
125
|
+
test_files: []
|
99
126
|
has_rdoc:
|
data/lib/fred/Baseline.rb
DELETED
@@ -1,150 +0,0 @@
|
|
1
|
-
# Baseline
|
2
|
-
# Katrin Erk April 05
|
3
|
-
#
|
4
|
-
# baseline for WSD:
|
5
|
-
# always assign most frequent sense
|
6
|
-
# The baseline doesn't do binary classifiers.
|
7
|
-
|
8
|
-
require "fred/FredConventions"
|
9
|
-
require "fred/FredSplitPkg"
|
10
|
-
require "fred/FredFeatures"
|
11
|
-
require "fred/FredDetermineTargets"
|
12
|
-
|
13
|
-
class Baseline
|
14
|
-
###
|
15
|
-
# new
|
16
|
-
#
|
17
|
-
# get splitlog dir (if any) along with everything else
|
18
|
-
# because we are only evaluating the training data
|
19
|
-
# at test time
|
20
|
-
#
|
21
|
-
def initialize(exp, # FredConfigData object
|
22
|
-
split_id = nil) # string: split ID
|
23
|
-
@exp = exp
|
24
|
-
@split_id = split_id
|
25
|
-
|
26
|
-
# for each lemma: remember prevalent sense
|
27
|
-
@lemma_to_sense = Hash.new()
|
28
|
-
|
29
|
-
if @split_id
|
30
|
-
split_obj = FredSplitPkg.new(@exp)
|
31
|
-
end
|
32
|
-
|
33
|
-
lemma_done = Hash.new()
|
34
|
-
|
35
|
-
# iterate through lemmas
|
36
|
-
@target_obj = Targets.new(@exp, nil, "r")
|
37
|
-
unless @target_obj.targets_okay
|
38
|
-
# error during initialization
|
39
|
-
$stderr.puts "Error: Could not read list of known targets, bailing out."
|
40
|
-
exit 1
|
41
|
-
end
|
42
|
-
|
43
|
-
@target_obj.get_lemmas().each { |lemmapos|
|
44
|
-
|
45
|
-
if @split_id
|
46
|
-
# read training split of answer keys
|
47
|
-
answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r", @split_id, "train")
|
48
|
-
else
|
49
|
-
# read full answer key file of training data
|
50
|
-
answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r")
|
51
|
-
end
|
52
|
-
|
53
|
-
count_senses = Hash.new(0)
|
54
|
-
|
55
|
-
answer_obj.each { |lemma, pos, ids, sid, senses_all, senses_this|
|
56
|
-
# senses_this may include more than one sense for multi-label assignment
|
57
|
-
senses_this.each { |sense|
|
58
|
-
count_senses[sense] += 1
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
@lemma_to_sense[lemmapos] = count_senses.keys().max { |a, b|
|
63
|
-
count_senses[a] <=> count_senses[b]
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
|
68
|
-
@lemma = nil
|
69
|
-
end
|
70
|
-
|
71
|
-
###
|
72
|
-
def train(infilename)
|
73
|
-
# no training here
|
74
|
-
end
|
75
|
-
|
76
|
-
###
|
77
|
-
def write(classifier_file)
|
78
|
-
# no classifiers to write
|
79
|
-
end
|
80
|
-
|
81
|
-
def exists?(classifier_file)
|
82
|
-
return true
|
83
|
-
end
|
84
|
-
|
85
|
-
def read(classifier_file)
|
86
|
-
values = deconstruct_fred_classifier_filename(File.basename(classifier_file))
|
87
|
-
@lemma = values["lemma"]
|
88
|
-
if @lemma
|
89
|
-
return true
|
90
|
-
else
|
91
|
-
$stderr.puts "Warning: couldn't determine lemma name in #{classifier_file}, skipping"
|
92
|
-
return false
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
def read_resultfile(filename)
|
98
|
-
retv = Array.new()
|
99
|
-
begin
|
100
|
-
f = File.new(filename)
|
101
|
-
rescue
|
102
|
-
raise "Could not read baseline result file #{filename}"
|
103
|
-
end
|
104
|
-
|
105
|
-
f.each { |line|
|
106
|
-
retv << [[ line.chomp(), 1.0 ]]
|
107
|
-
}
|
108
|
-
|
109
|
-
return retv
|
110
|
-
end
|
111
|
-
|
112
|
-
def apply(infilename, outfilename)
|
113
|
-
# open input and output file
|
114
|
-
begin
|
115
|
-
out_f = File.new(outfilename, "w")
|
116
|
-
rescue
|
117
|
-
$stderr.puts "Error: cannot write to classification output file #{outfilename}."
|
118
|
-
exit 1
|
119
|
-
end
|
120
|
-
begin
|
121
|
-
f = File.new(infilename)
|
122
|
-
rescue
|
123
|
-
$stderr.puts "Error: cannot read feature file #{infilename}."
|
124
|
-
exit 1
|
125
|
-
end
|
126
|
-
|
127
|
-
# deconstruct input filename to determine lemma
|
128
|
-
unless @lemma
|
129
|
-
# something went wrong in read()
|
130
|
-
return false
|
131
|
-
end
|
132
|
-
|
133
|
-
# do we have a sense for this?
|
134
|
-
unless (sense = @lemma_to_sense[@lemma])
|
135
|
-
# nope: assign "NONE" (or whatever the null label is here)
|
136
|
-
sense = @exp.get("negsense")
|
137
|
-
unless sense
|
138
|
-
sense = "NONE"
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
f.each { |line|
|
143
|
-
out_f.puts sense
|
144
|
-
}
|
145
|
-
out_f.close()
|
146
|
-
f.close()
|
147
|
-
|
148
|
-
return true
|
149
|
-
end
|
150
|
-
end
|
data/lib/fred/FileZipped.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
class FileZipped
|
2
|
-
|
3
|
-
def FileZipped.new(filename,
|
4
|
-
mode = "r")
|
5
|
-
|
6
|
-
# escape characters in the filename that
|
7
|
-
# would make the shell hiccup on the command
|
8
|
-
filename = filename.gsub(/([();:!?'`])/, 'XXSLASHXX\1')
|
9
|
-
filename = filename.gsub(/XXSLASHXX/, "\\")
|
10
|
-
|
11
|
-
begin
|
12
|
-
case mode
|
13
|
-
when "r"
|
14
|
-
unless File.exists? filename
|
15
|
-
raise "catchme"
|
16
|
-
end
|
17
|
-
return IO.popen("gunzip -c #{filename}")
|
18
|
-
when "w"
|
19
|
-
return IO.popen("gzip > #{filename}", "w")
|
20
|
-
when "a"
|
21
|
-
return IO.popen("gzip >> #{filename}", "w")
|
22
|
-
else
|
23
|
-
$stderr.puts "FileZipped error: only modes r, w, a are implemented. I got: #{mode}."
|
24
|
-
exit 1
|
25
|
-
end
|
26
|
-
rescue
|
27
|
-
raise "Error opening file #{filename}."
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|