shalmaneser-fred 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/fred +8 -3
  4. data/lib/fred/FredConventions.rb +190 -189
  5. data/lib/fred/abstract_context_provider.rb +246 -0
  6. data/lib/fred/abstract_fred_feature_access.rb +43 -0
  7. data/lib/fred/answer_key_access.rb +130 -0
  8. data/lib/fred/aux_keep_writers.rb +94 -0
  9. data/lib/fred/baseline.rb +153 -0
  10. data/lib/fred/context_provider.rb +55 -0
  11. data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
  12. data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
  13. data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
  14. data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
  15. data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
  16. data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
  17. data/lib/fred/feature_extractors.rb +5 -0
  18. data/lib/fred/file_zipped.rb +43 -0
  19. data/lib/fred/find_all_targets.rb +94 -0
  20. data/lib/fred/find_targets_from_frames.rb +92 -0
  21. data/lib/fred/fred.rb +43 -40
  22. data/lib/fred/fred_error.rb +15 -0
  23. data/lib/fred/fred_eval.rb +311 -0
  24. data/lib/fred/fred_feature_access.rb +420 -0
  25. data/lib/fred/fred_feature_info.rb +56 -0
  26. data/lib/fred/fred_featurize.rb +525 -0
  27. data/lib/fred/fred_parameters.rb +190 -0
  28. data/lib/fred/fred_split.rb +86 -0
  29. data/lib/fred/fred_split_pkg.rb +189 -0
  30. data/lib/fred/fred_test.rb +571 -0
  31. data/lib/fred/fred_train.rb +125 -0
  32. data/lib/fred/grammatical_function_access.rb +63 -0
  33. data/lib/fred/md5.rb +6 -0
  34. data/lib/fred/meta_feature_access.rb +185 -0
  35. data/lib/fred/non_contiguous_context_provider.rb +532 -0
  36. data/lib/fred/opt_parser.rb +182 -161
  37. data/lib/fred/plot_and_r_eval.rb +486 -0
  38. data/lib/fred/single_sent_context_provider.rb +76 -0
  39. data/lib/fred/slide_var.rb +148 -0
  40. data/lib/fred/targets.rb +136 -0
  41. data/lib/fred/toggle_var.rb +61 -0
  42. data/lib/fred/word_lemma_pos_ne.rb +51 -0
  43. data/lib/fred/write_features_binary.rb +95 -0
  44. data/lib/fred/write_features_nary.rb +51 -0
  45. data/lib/fred/write_features_nary_or_binary.rb +51 -0
  46. data/lib/shalmaneser/fred.rb +1 -0
  47. metadata +57 -30
  48. data/lib/fred/Baseline.rb +0 -150
  49. data/lib/fred/FileZipped.rb +0 -31
  50. data/lib/fred/FredBOWContext.rb +0 -877
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred_config_data.rb +0 -185
  64. data/test/frprep/test_opt_parser.rb +0 -94
  65. data/test/functional/functional_test_helper.rb +0 -58
  66. data/test/functional/test_fred.rb +0 -47
  67. data/test/functional/test_frprep.rb +0 -99
  68. data/test/functional/test_rosy.rb +0 -40
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shalmaneser-fred
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0.rc4
4
+ version: 1.2.rc5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Beliankou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-04 00:00:00.000000000 Z
11
+ date: 2016-01-13 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: shalmaneser-lib
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.rc5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.rc5
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: mysql
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,31 +53,49 @@ files:
39
53
  - LICENSE.md
40
54
  - README.md
41
55
  - bin/fred
42
- - lib/fred/Baseline.rb
43
- - lib/fred/FileZipped.rb
44
- - lib/fred/FredBOWContext.rb
45
56
  - lib/fred/FredConventions.rb
46
- - lib/fred/FredDetermineTargets.rb
47
- - lib/fred/FredEval.rb
48
- - lib/fred/FredFeatureExtractors.rb
49
- - lib/fred/FredFeatures.rb
50
- - lib/fred/FredFeaturize.rb
51
- - lib/fred/FredNumTrainingSenses.rb
52
- - lib/fred/FredParameters.rb
53
- - lib/fred/FredSplit.rb
54
- - lib/fred/FredSplitPkg.rb
55
- - lib/fred/FredTest.rb
56
- - lib/fred/FredTrain.rb
57
- - lib/fred/PlotAndREval.rb
57
+ - lib/fred/abstract_context_provider.rb
58
+ - lib/fred/abstract_fred_feature_access.rb
59
+ - lib/fred/answer_key_access.rb
60
+ - lib/fred/aux_keep_writers.rb
61
+ - lib/fred/baseline.rb
62
+ - lib/fred/context_provider.rb
63
+ - lib/fred/feature_extractors.rb
64
+ - lib/fred/feature_extractors/fred_context_feature_extractor.rb
65
+ - lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb
66
+ - lib/fred/feature_extractors/fred_feature_extractor.rb
67
+ - lib/fred/feature_extractors/fred_ngram_feature_extractor.rb
68
+ - lib/fred/feature_extractors/fred_syn_feature_extractor.rb
69
+ - lib/fred/feature_extractors/fred_synsem_feature_extractor.rb
70
+ - lib/fred/file_zipped.rb
71
+ - lib/fred/find_all_targets.rb
72
+ - lib/fred/find_targets_from_frames.rb
58
73
  - lib/fred/fred.rb
59
- - lib/fred/fred_config_data.rb
74
+ - lib/fred/fred_error.rb
75
+ - lib/fred/fred_eval.rb
76
+ - lib/fred/fred_feature_access.rb
77
+ - lib/fred/fred_feature_info.rb
78
+ - lib/fred/fred_featurize.rb
79
+ - lib/fred/fred_parameters.rb
80
+ - lib/fred/fred_split.rb
81
+ - lib/fred/fred_split_pkg.rb
82
+ - lib/fred/fred_test.rb
83
+ - lib/fred/fred_train.rb
84
+ - lib/fred/grammatical_function_access.rb
60
85
  - lib/fred/md5.rb
86
+ - lib/fred/meta_feature_access.rb
87
+ - lib/fred/non_contiguous_context_provider.rb
61
88
  - lib/fred/opt_parser.rb
62
- - test/frprep/test_opt_parser.rb
63
- - test/functional/functional_test_helper.rb
64
- - test/functional/test_fred.rb
65
- - test/functional/test_frprep.rb
66
- - test/functional/test_rosy.rb
89
+ - lib/fred/plot_and_r_eval.rb
90
+ - lib/fred/single_sent_context_provider.rb
91
+ - lib/fred/slide_var.rb
92
+ - lib/fred/targets.rb
93
+ - lib/fred/toggle_var.rb
94
+ - lib/fred/word_lemma_pos_ne.rb
95
+ - lib/fred/write_features_binary.rb
96
+ - lib/fred/write_features_nary.rb
97
+ - lib/fred/write_features_nary_or_binary.rb
98
+ - lib/shalmaneser/fred.rb
67
99
  homepage: https://github.com/arbox/shalmaneser
68
100
  licenses:
69
101
  - GPL-2.0
@@ -86,14 +118,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
118
  version: 1.3.1
87
119
  requirements: []
88
120
  rubyforge_project:
89
- rubygems_version: 2.4.5
121
+ rubygems_version: 2.5.1
90
122
  signing_key:
91
123
  specification_version: 4
92
124
  summary: FRED
93
- test_files:
94
- - test/frprep/test_opt_parser.rb
95
- - test/functional/functional_test_helper.rb
96
- - test/functional/test_fred.rb
97
- - test/functional/test_frprep.rb
98
- - test/functional/test_rosy.rb
125
+ test_files: []
99
126
  has_rdoc:
data/lib/fred/Baseline.rb DELETED
@@ -1,150 +0,0 @@
1
- # Baseline
2
- # Katrin Erk April 05
3
- #
4
- # baseline for WSD:
5
- # always assign most frequent sense
6
- # The baseline doesn't do binary classifiers.
7
-
8
- require "fred/FredConventions"
9
- require "fred/FredSplitPkg"
10
- require "fred/FredFeatures"
11
- require "fred/FredDetermineTargets"
12
-
13
- class Baseline
14
- ###
15
- # new
16
- #
17
- # get splitlog dir (if any) along with everything else
18
- # because we are only evaluating the training data
19
- # at test time
20
- #
21
- def initialize(exp, # FredConfigData object
22
- split_id = nil) # string: split ID
23
- @exp = exp
24
- @split_id = split_id
25
-
26
- # for each lemma: remember prevalent sense
27
- @lemma_to_sense = Hash.new()
28
-
29
- if @split_id
30
- split_obj = FredSplitPkg.new(@exp)
31
- end
32
-
33
- lemma_done = Hash.new()
34
-
35
- # iterate through lemmas
36
- @target_obj = Targets.new(@exp, nil, "r")
37
- unless @target_obj.targets_okay
38
- # error during initialization
39
- $stderr.puts "Error: Could not read list of known targets, bailing out."
40
- exit 1
41
- end
42
-
43
- @target_obj.get_lemmas().each { |lemmapos|
44
-
45
- if @split_id
46
- # read training split of answer keys
47
- answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r", @split_id, "train")
48
- else
49
- # read full answer key file of training data
50
- answer_obj = AnswerKeyAccess.new(@exp, "train", lemmapos, "r")
51
- end
52
-
53
- count_senses = Hash.new(0)
54
-
55
- answer_obj.each { |lemma, pos, ids, sid, senses_all, senses_this|
56
- # senses_this may include more than one sense for multi-label assignment
57
- senses_this.each { |sense|
58
- count_senses[sense] += 1
59
- }
60
- }
61
-
62
- @lemma_to_sense[lemmapos] = count_senses.keys().max { |a, b|
63
- count_senses[a] <=> count_senses[b]
64
- }
65
- }
66
-
67
-
68
- @lemma = nil
69
- end
70
-
71
- ###
72
- def train(infilename)
73
- # no training here
74
- end
75
-
76
- ###
77
- def write(classifier_file)
78
- # no classifiers to write
79
- end
80
-
81
- def exists?(classifier_file)
82
- return true
83
- end
84
-
85
- def read(classifier_file)
86
- values = deconstruct_fred_classifier_filename(File.basename(classifier_file))
87
- @lemma = values["lemma"]
88
- if @lemma
89
- return true
90
- else
91
- $stderr.puts "Warning: couldn't determine lemma name in #{classifier_file}, skipping"
92
- return false
93
- end
94
- end
95
-
96
-
97
- def read_resultfile(filename)
98
- retv = Array.new()
99
- begin
100
- f = File.new(filename)
101
- rescue
102
- raise "Could not read baseline result file #{filename}"
103
- end
104
-
105
- f.each { |line|
106
- retv << [[ line.chomp(), 1.0 ]]
107
- }
108
-
109
- return retv
110
- end
111
-
112
- def apply(infilename, outfilename)
113
- # open input and output file
114
- begin
115
- out_f = File.new(outfilename, "w")
116
- rescue
117
- $stderr.puts "Error: cannot write to classification output file #{outfilename}."
118
- exit 1
119
- end
120
- begin
121
- f = File.new(infilename)
122
- rescue
123
- $stderr.puts "Error: cannot read feature file #{infilename}."
124
- exit 1
125
- end
126
-
127
- # deconstruct input filename to determine lemma
128
- unless @lemma
129
- # something went wrong in read()
130
- return false
131
- end
132
-
133
- # do we have a sense for this?
134
- unless (sense = @lemma_to_sense[@lemma])
135
- # nope: assign "NONE" (or whatever the null label is here)
136
- sense = @exp.get("negsense")
137
- unless sense
138
- sense = "NONE"
139
- end
140
- end
141
-
142
- f.each { |line|
143
- out_f.puts sense
144
- }
145
- out_f.close()
146
- f.close()
147
-
148
- return true
149
- end
150
- end
@@ -1,31 +0,0 @@
1
- class FileZipped
2
-
3
- def FileZipped.new(filename,
4
- mode = "r")
5
-
6
- # escape characters in the filename that
7
- # would make the shell hiccup on the command
8
- filename = filename.gsub(/([();:!?'`])/, 'XXSLASHXX\1')
9
- filename = filename.gsub(/XXSLASHXX/, "\\")
10
-
11
- begin
12
- case mode
13
- when "r"
14
- unless File.exists? filename
15
- raise "catchme"
16
- end
17
- return IO.popen("gunzip -c #{filename}")
18
- when "w"
19
- return IO.popen("gzip > #{filename}", "w")
20
- when "a"
21
- return IO.popen("gzip >> #{filename}", "w")
22
- else
23
- $stderr.puts "FileZipped error: only modes r, w, a are implemented. I got: #{mode}."
24
- exit 1
25
- end
26
- rescue
27
- raise "Error opening file #{filename}."
28
- end
29
- end
30
-
31
- end