shalmaneser-fred 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/fred +8 -3
- data/lib/fred/FredConventions.rb +190 -189
- data/lib/fred/abstract_context_provider.rb +246 -0
- data/lib/fred/abstract_fred_feature_access.rb +43 -0
- data/lib/fred/answer_key_access.rb +130 -0
- data/lib/fred/aux_keep_writers.rb +94 -0
- data/lib/fred/baseline.rb +153 -0
- data/lib/fred/context_provider.rb +55 -0
- data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
- data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
- data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
- data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
- data/lib/fred/feature_extractors.rb +5 -0
- data/lib/fred/file_zipped.rb +43 -0
- data/lib/fred/find_all_targets.rb +94 -0
- data/lib/fred/find_targets_from_frames.rb +92 -0
- data/lib/fred/fred.rb +43 -40
- data/lib/fred/fred_error.rb +15 -0
- data/lib/fred/fred_eval.rb +311 -0
- data/lib/fred/fred_feature_access.rb +420 -0
- data/lib/fred/fred_feature_info.rb +56 -0
- data/lib/fred/fred_featurize.rb +525 -0
- data/lib/fred/fred_parameters.rb +190 -0
- data/lib/fred/fred_split.rb +86 -0
- data/lib/fred/fred_split_pkg.rb +189 -0
- data/lib/fred/fred_test.rb +571 -0
- data/lib/fred/fred_train.rb +125 -0
- data/lib/fred/grammatical_function_access.rb +63 -0
- data/lib/fred/md5.rb +6 -0
- data/lib/fred/meta_feature_access.rb +185 -0
- data/lib/fred/non_contiguous_context_provider.rb +532 -0
- data/lib/fred/opt_parser.rb +182 -161
- data/lib/fred/plot_and_r_eval.rb +486 -0
- data/lib/fred/single_sent_context_provider.rb +76 -0
- data/lib/fred/slide_var.rb +148 -0
- data/lib/fred/targets.rb +136 -0
- data/lib/fred/toggle_var.rb +61 -0
- data/lib/fred/word_lemma_pos_ne.rb +51 -0
- data/lib/fred/write_features_binary.rb +95 -0
- data/lib/fred/write_features_nary.rb +51 -0
- data/lib/fred/write_features_nary_or_binary.rb +51 -0
- data/lib/shalmaneser/fred.rb +1 -0
- metadata +57 -30
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred_config_data.rb +0 -185
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -58
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -99
- data/test/functional/test_rosy.rb +0 -40
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f7ca2f794d9024633383d41b2092b9f250197bf
|
4
|
+
data.tar.gz: d8bc88eeda007ca746e39565709b5e3fd414f58e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c81db071bbee52c8aa337cd9dfccbb7190cbc156cc2f38c580c2647196093b5e08b49170d891762ca1c0965a5e59695371a75a2b63a93cda179fd4f47ced219
|
7
|
+
data.tar.gz: e3e07f5a64ba00e6d1eca438a1106cd3a40b89167e0c318e7184c56448d7a6940a2bd0210773ff325989a2277555f016a6ecceb55f50943f10ce3b96bb0531c4
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# SHALMANESER
|
2
2
|
|
3
3
|
[RubyGems](http://rubygems.org/gems/shalmaneser) |
|
4
4
|
[Shalmanesers Project Page](http://bu.chsta.be/projects/shalmaneser/) |
|
@@ -7,9 +7,9 @@
|
|
7
7
|
|
8
8
|
|
9
9
|
[![Gem Version](https://img.shields.io/gem/v/shalmaneser.svg")](https://rubygems.org/gems/shalmaneser)
|
10
|
-
[![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/
|
11
|
-
[![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/fred)
|
12
|
-
[![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/rosy)
|
10
|
+
[![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/shalmaneser-prep)
|
11
|
+
[![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/shalmaneser-fred)
|
12
|
+
[![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/shalmaneser-rosy)
|
13
13
|
|
14
14
|
|
15
15
|
[![License GPL 2](http://img.shields.io/badge/License-GPL%202-green.svg)](http://www.gnu.org/licenses/gpl-2.0.txt)
|
@@ -17,12 +17,44 @@
|
|
17
17
|
[![Code Climate](https://img.shields.io/codeclimate/github/arbox/shalmaneser.svg")](https://codeclimate.com/github/arbox/shalmaneser)
|
18
18
|
[![Dependency Status](https://img.shields.io/gemnasium/arbox/shalmaneser.svg")](https://gemnasium.com/arbox/shalmaneser)
|
19
19
|
|
20
|
+
[SHALMANESER](http://www.coli.uni-saarland.de/projects/salsa/shal/) is a SHALlow seMANtic parSER.
|
21
|
+
|
22
|
+
The name Shalmaneser is borrowed from John Brunner. He describes in his novel
|
23
|
+
"Stand on Zanzibar" an all knowing supercomputer baptized Shalmaneser.
|
24
|
+
|
25
|
+
Shalmaneser also has other origins like the king [Shalmaneser III](https://en.wikipedia.org/wiki/Shalmaneser_III).
|
26
|
+
|
27
|
+
> "SCANALYZER is the one single, the ONLY study of the news in depth
|
28
|
+
> that’s processed by General Technics’ famed computer Shalmaneser,
|
29
|
+
> who sees all, hears all, knows all save only that which YOU, Mr. and Mrs.
|
30
|
+
> Everywhere, wish to keep to yourselves." <br/>
|
31
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
32
|
+
|
33
|
+
> But Shalmaneser is a Micryogenic® computer bathed in liquid helium and it’s cold in his vault. <br/>
|
34
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
35
|
+
|
36
|
+
> “Of course not. Shalmaneser’s main task is to achieve the impossible again, a routine undertaking here at GT.” <br/>
|
37
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
38
|
+
|
39
|
+
> “They programmed Shalmaneser with the formula for this stiffener, see, and…” <br/>
|
40
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
41
|
+
|
42
|
+
> What am I going to do now? <br/>
|
43
|
+
> “All right, Shalmaneser!” <br/>
|
44
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
45
|
+
|
46
|
+
> Shalmaneser is a Micryogenic® computer bathed in liquid helium and there’s no sign of Teresa. <br/>
|
47
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
48
|
+
|
49
|
+
> Bathed in his currents of liquid helium, self-contained, immobile, vastly well informed by every mechanical sense: Shalmaneser. <br/>
|
50
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
51
|
+
|
20
52
|
## Description
|
21
53
|
|
22
54
|
Please be careful, the whole thing is under construction! For now Shalmaneser it not intended to run on Windows systems since it heavily uses system calls for external invocations.
|
23
55
|
Current versions of Shalmaneser have been tested on Linux only (other *NIX testers are welcome!).
|
24
56
|
|
25
|
-
Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called SRL (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
|
57
|
+
Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called [SRL](https://en.wikipedia.org/wiki/Semantic_role_labeling) (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
|
26
58
|
|
27
59
|
For end users, we provide a simple end user mode which can simply apply the pre-trained classifiers
|
28
60
|
for [English](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (FrameNet 1.3 annotation / Collins parser)
|
@@ -34,32 +66,27 @@ For researchers interested in investigating shallow semantic parsing, our system
|
|
34
66
|
|
35
67
|
## Origin
|
36
68
|
|
37
|
-
The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk and others during their work in the SALSA Project.
|
69
|
+
The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk, Alexander Koller, Ines Rehbein, Aljoscha Burchardt and others during their work in the SALSA Project.
|
38
70
|
|
39
71
|
You can find original versions of Shalmaneser up to ``1.1`` on the [SALSA](http://www.coli.uni-saarland.de/projects/salsa/shal/) project page.
|
40
72
|
|
41
73
|
## Publications on Shalmaneser
|
42
74
|
|
43
75
|
- K. Erk and S. Padó: Shalmaneser - a flexible toolbox for semantic role assignment. Proceedings of LREC 2006, Genoa, Italy. [Click here for details](http://www.nlpado.de/~sebastian/pub/papers/lrec06_erk.pdf).
|
76
|
+
|
44
77
|
- TODO: add other works
|
45
78
|
|
46
79
|
## Documentation
|
47
80
|
|
48
|
-
The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/
|
81
|
+
The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md) folder.
|
49
82
|
|
50
83
|
## Development
|
51
84
|
|
52
|
-
We are working now on
|
53
|
-
|
54
|
-
- ``dev`` - our development branch incorporating actual changes, for now pointing to ``1.2``;
|
55
|
-
|
56
|
-
- ``1.2`` - intermediate target;
|
57
|
-
|
58
|
-
- ``2.0`` - final target.
|
85
|
+
We are working now only on the `master` branch. For different intermediate versions see corresponding tags.
|
59
86
|
|
60
87
|
## Installation
|
61
88
|
|
62
|
-
See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/
|
89
|
+
See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md#installation) folder.
|
63
90
|
|
64
91
|
### Tokenizers
|
65
92
|
|
@@ -75,7 +102,7 @@ See the installation instructions in the [doc](https://github.com/arbox/shalmane
|
|
75
102
|
|
76
103
|
### Parsers
|
77
104
|
|
78
|
-
- [BerkeleyParser](https://
|
105
|
+
- [BerkeleyParser](https://github.com/slavpetrov/berkeleyparser)
|
79
106
|
- [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml)
|
80
107
|
- [Collins Parser](http://www.cs.columbia.edu/~mcollins/code.html)
|
81
108
|
|
@@ -86,8 +113,10 @@ See the installation instructions in the [doc](https://github.com/arbox/shalmane
|
|
86
113
|
|
87
114
|
## License
|
88
115
|
|
89
|
-
|
116
|
+
Shalmaneser is released under the `GPL v. 2.0` license as of the initial authors.
|
117
|
+
|
118
|
+
For a local copy of the full license text see the [LICENSE](LICENSE.md) file.
|
90
119
|
|
91
120
|
## Contributing
|
92
121
|
|
93
|
-
|
122
|
+
Feel free to contact me via Github. Open an issue if you see problems or need help.
|
data/bin/fred
CHANGED
@@ -10,7 +10,12 @@
|
|
10
10
|
require 'fred/opt_parser'
|
11
11
|
require 'fred/fred'
|
12
12
|
|
13
|
-
|
13
|
+
begin
|
14
|
+
options = ::Shalmaneser::Fred::OptParser.parse(ARGV)
|
14
15
|
|
15
|
-
fred = Fred::Fred.new(options)
|
16
|
-
fred.assign
|
16
|
+
fred = ::Shalmaneser::Fred::Fred.new(options)
|
17
|
+
fred.assign
|
18
|
+
rescue => e
|
19
|
+
$stderr.puts 'Fred cannot serve you!'
|
20
|
+
$stderr.puts e.message, e.backtrace
|
21
|
+
end
|
data/lib/fred/FredConventions.rb
CHANGED
@@ -4,229 +4,230 @@
|
|
4
4
|
# several small things that should be uniform
|
5
5
|
# throughout the system
|
6
6
|
|
7
|
-
require
|
7
|
+
require 'monkey_patching/file.rb'
|
8
|
+
require 'fred/answer_key_access'
|
8
9
|
|
9
|
-
|
10
|
-
|
10
|
+
module Shalmaneser
|
11
|
+
module Fred
|
11
12
|
|
12
|
-
|
13
|
-
# joining and breaking up senses
|
14
|
-
def fred_join_senses(senses)
|
15
|
-
return senses.sort().join("++")
|
16
|
-
end
|
17
|
-
|
18
|
-
def fred_split_sense(joined_senses)
|
19
|
-
return joined_senses.split("++")
|
20
|
-
end
|
13
|
+
module_function
|
21
14
|
|
22
|
-
|
23
|
-
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# if is_existing == existing, the directory is checked for existence,
|
29
|
-
# if is_existing == new, it is created if necessary
|
30
|
-
#
|
31
|
-
# returns: a string
|
32
|
-
def fred_dirname(exp, # FredConfigData object
|
33
|
-
maindir, # string: main part of directory name
|
34
|
-
subdir, # string: subpart of directory name
|
35
|
-
is_existing = "existing") # string: "existing" or "new", default: existing
|
36
|
-
|
37
|
-
case is_existing
|
38
|
-
when "existing"
|
39
|
-
return File.existing_dir(exp.get("fred_directory"),
|
40
|
-
exp.get("experiment_ID"),
|
41
|
-
maindir,
|
42
|
-
subdir)
|
43
|
-
when "new"
|
44
|
-
return File.new_dir(exp.get("fred_directory"),
|
45
|
-
exp.get("experiment_ID"),
|
46
|
-
maindir,
|
47
|
-
subdir)
|
48
|
-
else
|
49
|
-
raise "Shouldn't be here: #{is_existing}"
|
50
|
-
end
|
51
|
-
end
|
15
|
+
def determine_training_senses(lemma, exp, lemmas_and_senses_obj, split_id)
|
16
|
+
if split_id
|
17
|
+
# oh no, we're splitting the dataset into random training and test portions.
|
18
|
+
# this means that we actually have to look into the training part of the data to
|
19
|
+
# determine the number of training senses
|
52
20
|
|
53
|
-
|
54
|
-
# filenames for feature files
|
55
|
-
def fred_feature_filename(lemma, sense = nil,
|
56
|
-
do_binary = false)
|
57
|
-
if do_binary
|
58
|
-
return "fred.features.#{lemma}.SENSE.#{sense}"
|
59
|
-
else
|
60
|
-
return "fred.features.#{lemma}"
|
61
|
-
end
|
62
|
-
end
|
21
|
+
senses_hash = {}
|
63
22
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
end
|
23
|
+
reader = AnswerKeyAccess.new(exp, "train", lemma, "r", split_id, "train")
|
24
|
+
reader.each do |_lemma, _pos, _ids, _sids, gold_senses, _transf_gold_senses|
|
25
|
+
gold_senses.each { |s| senses_hash[s] = true }
|
26
|
+
end
|
69
27
|
|
70
|
-
|
71
|
-
# deconstruct split filename
|
72
|
-
# returns: lemma
|
73
|
-
def deconstruct_fred_split_filename(filename)
|
74
|
-
basename = File.basename(filename)
|
75
|
-
if basename =~ /^fred\.split\.(.*)/
|
76
|
-
return $1
|
77
|
-
else
|
78
|
-
return nil
|
79
|
-
end
|
80
|
-
end
|
28
|
+
return senses_hash.keys
|
81
29
|
|
82
|
-
|
83
|
-
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
if basename =~ /^fred\.features\.(.*)\.SENSE\.(.*)$/
|
94
|
-
retv["lemma"] = $1
|
95
|
-
retv["sense"] = $2
|
96
|
-
elsif basename =~ /^fred\.features\.(.*)/
|
97
|
-
# fred.features.#{lemma}
|
98
|
-
retv["lemma"] = $1
|
99
|
-
|
100
|
-
else
|
101
|
-
# complete mismatch
|
102
|
-
return nil
|
30
|
+
else
|
31
|
+
# we're using separate test data.
|
32
|
+
# so we can just look up the number of training senses
|
33
|
+
# in the lemmas_and_senses object
|
34
|
+
senses = lemmas_and_senses_obj.get_senses(lemma)
|
35
|
+
if senses
|
36
|
+
return senses
|
37
|
+
else
|
38
|
+
return []
|
39
|
+
end
|
40
|
+
end
|
103
41
|
end
|
104
42
|
|
105
|
-
return retv
|
106
|
-
end
|
107
43
|
|
108
|
-
|
109
|
-
#
|
110
|
-
|
111
|
-
|
112
|
-
|
44
|
+
###
|
45
|
+
# joining and breaking up senses
|
46
|
+
# @note Used only in FredFeatures.
|
47
|
+
def fred_join_senses(senses)
|
48
|
+
senses.sort.join("++")
|
49
|
+
end
|
113
50
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
51
|
+
# @note Used only in FredEval.
|
52
|
+
def fred_split_sense(joined_senses)
|
53
|
+
joined_senses.split("++")
|
54
|
+
end
|
118
55
|
|
119
|
-
|
120
|
-
|
56
|
+
###
|
57
|
+
# fred_dirname
|
58
|
+
#
|
59
|
+
# @note Used on multiple positions.
|
60
|
+
# constructs a directory name:
|
61
|
+
# fred data directory / experiment ID / maindir / subdir
|
62
|
+
#
|
63
|
+
# if is_existing == existing, the directory is checked for existence,
|
64
|
+
# if is_existing == new, it is created if necessary
|
65
|
+
#
|
66
|
+
# @return [String]
|
67
|
+
def fred_dirname(exp, # FredConfigData object
|
68
|
+
maindir, # string: main part of directory name
|
69
|
+
subdir, # string: subpart of directory name
|
70
|
+
is_existing = "existing") # string: "existing" or "new", default: existing
|
71
|
+
|
72
|
+
case is_existing
|
73
|
+
when "existing"
|
74
|
+
return File.existing_dir(exp.get("fred_directory"),
|
75
|
+
exp.get("experiment_ID"),
|
76
|
+
maindir,
|
77
|
+
subdir)
|
78
|
+
when "new"
|
79
|
+
return File.new_dir(exp.get("fred_directory"),
|
80
|
+
exp.get("experiment_ID"),
|
81
|
+
maindir,
|
82
|
+
subdir)
|
83
|
+
else
|
84
|
+
raise "Shouldn't be here: #{is_existing}"
|
85
|
+
end
|
86
|
+
end
|
121
87
|
|
122
|
-
|
123
|
-
|
88
|
+
####
|
89
|
+
# filenames for feature files
|
90
|
+
# @note Used on multiple points.
|
91
|
+
def fred_feature_filename(lemma, sense = nil,
|
92
|
+
do_binary = false)
|
93
|
+
if do_binary
|
94
|
+
return "fred.features.#{lemma}.SENSE.#{sense}"
|
124
95
|
else
|
125
|
-
return
|
96
|
+
return "fred.features.#{lemma}"
|
126
97
|
end
|
98
|
+
end
|
127
99
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
100
|
+
###
|
101
|
+
# deconstruct split filename
|
102
|
+
# returns: lemma
|
103
|
+
# @note Not used anywhere.
|
104
|
+
def deconstruct_fred_split_filename(filename)
|
105
|
+
basename = File.basename(filename)
|
106
|
+
if basename =~ /^fred\.split\.(.*)/
|
107
|
+
return $1
|
132
108
|
else
|
133
|
-
return
|
109
|
+
return nil
|
134
110
|
end
|
135
111
|
end
|
136
|
-
end
|
137
112
|
|
138
|
-
###
|
139
|
-
#
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
113
|
+
###
|
114
|
+
# deconstruct feature file name
|
115
|
+
# returns: hash with keys
|
116
|
+
# "lemma"
|
117
|
+
# "sense
|
118
|
+
# @note Used only in FredFeatures.
|
119
|
+
def deconstruct_fred_feature_filename(filename)
|
120
|
+
basename = File.basename(filename)
|
121
|
+
retv = {}
|
122
|
+
|
123
|
+
# binary:
|
124
|
+
# fred.features.#{lemma}.SENSE.#{sense}
|
125
|
+
if basename =~ /^fred\.features\.(.*)\.SENSE\.(.*)$/
|
126
|
+
retv["lemma"] = $1
|
127
|
+
retv["sense"] = $2
|
128
|
+
elsif basename =~ /^fred\.features\.(.*)/
|
129
|
+
# fred.features.#{lemma}
|
130
|
+
retv["lemma"] = $1
|
131
|
+
|
132
|
+
else
|
133
|
+
# complete mismatch
|
134
|
+
return nil
|
135
|
+
end
|
136
|
+
|
137
|
+
return retv
|
145
138
|
end
|
146
|
-
end
|
147
139
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
elsif filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)$/
|
154
|
-
retv["lemma"] = $2
|
140
|
+
####
|
141
|
+
# filename for answer key files
|
142
|
+
# @note Used only in FredFeatures.
|
143
|
+
def fred_answerkey_filename(lemma)
|
144
|
+
return "fred.answerkey.#{lemma}"
|
155
145
|
end
|
156
|
-
return retv
|
157
|
-
end
|
158
146
|
|
159
|
-
###
|
160
|
-
#
|
161
|
-
|
162
|
-
|
163
|
-
|
147
|
+
###
|
148
|
+
# classifier directory
|
149
|
+
# @note Used on multiple points.
|
150
|
+
def fred_classifier_directory(exp, # FredConfigData object
|
151
|
+
splitID = nil) # string or nil
|
164
152
|
|
165
|
-
|
166
|
-
#
|
167
|
-
# a separator character
|
168
|
-
#
|
169
|
-
# fred_lemmapos_combine: take two strings, return combined string
|
170
|
-
# if POS is nil, returns lemma<separator character>
|
171
|
-
# fred_lemmapos_separate: take one string, return two strings
|
172
|
-
# if no POS could be retrieved, returns nil as POS and the whole string as lemma
|
173
|
-
def fred_lemmapos_combine(lemma, # string
|
174
|
-
pos) # string
|
175
|
-
return lemma.to_s + "." + pos.to_s.gsub(/\./, "DOT")
|
176
|
-
end
|
153
|
+
if exp.get("classifier_dir")
|
154
|
+
# user-specified classifier directory
|
177
155
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
else
|
184
|
-
# no POS found, treat all of lemmapos as lemma
|
185
|
-
return [ lemmapos, nil ]
|
186
|
-
end
|
187
|
-
end
|
188
|
-
end
|
156
|
+
if splitID
|
157
|
+
return File.new_dir(exp.get("classifier_dir"), splitID)
|
158
|
+
else
|
159
|
+
return File.new_dir(exp.get("classifier_dir"))
|
160
|
+
end
|
189
161
|
|
190
|
-
|
191
|
-
#
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
#
|
198
|
-
# as a tuple
|
199
|
-
#
|
200
|
-
# WARNING: word and lemma are turned to lowercase
|
201
|
-
module WordLemmaPosNe
|
202
|
-
def word_lemma_pos_ne(syn_obj, # SynNode object
|
203
|
-
i) # SynInterpreter class
|
204
|
-
unless syn_obj.is_terminal?
|
205
|
-
$stderr.puts "Featurization warning: unexpectedly received non-terminal"
|
206
|
-
return [ nil, nil, nil, nil ]
|
162
|
+
else
|
163
|
+
# my classifier directory
|
164
|
+
if splitID
|
165
|
+
return fred_dirname(exp, "classifiers", splitID, "new")
|
166
|
+
else
|
167
|
+
return fred_dirname(exp, "classifiers", "all", "new")
|
168
|
+
end
|
207
169
|
end
|
170
|
+
end
|
208
171
|
|
209
|
-
|
210
|
-
|
211
|
-
|
172
|
+
###
|
173
|
+
# classifier file
|
174
|
+
# @note Used on multiple points.
|
175
|
+
def fred_classifier_filename(classifier, lemma, sense = nil)
|
176
|
+
if sense
|
177
|
+
return "fred.classif.#{classifier}.LEMMA.#{lemma}.SENSE.#{sense}"
|
178
|
+
else
|
179
|
+
return "fred.classif.#{classifier}.LEMMA.#{lemma}"
|
212
180
|
end
|
181
|
+
end
|
213
182
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
if
|
219
|
-
lemma
|
183
|
+
# @note Used only in Baseline.
|
184
|
+
def deconstruct_fred_classifier_filename(filename)
|
185
|
+
retv = {}
|
186
|
+
|
187
|
+
if filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)\.SENSE\.(.*)$/
|
188
|
+
retv["lemma"] = $2
|
189
|
+
retv["sense"] = $3
|
190
|
+
elsif filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)$/
|
191
|
+
retv["lemma"] = $2
|
220
192
|
end
|
221
193
|
|
222
|
-
|
194
|
+
retv
|
195
|
+
end
|
223
196
|
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
197
|
+
###
|
198
|
+
# result file
|
199
|
+
# @note Used on multiple points.
|
200
|
+
def fred_result_filename(lemma)
|
201
|
+
"fred.result.#{lemma.gsub(/\./, "_")}"
|
202
|
+
end
|
228
203
|
|
229
|
-
|
204
|
+
##########
|
205
|
+
# lemma and POS: combine into string separated by
|
206
|
+
# a separator character
|
207
|
+
#
|
208
|
+
# fred_lemmapos_combine: take two strings, return combined string
|
209
|
+
# if POS is nil, returns lemma<separator character>
|
210
|
+
# fred_lemmapos_separate: take one string, return two strings
|
211
|
+
# if no POS could be retrieved, returns nil as POS and the whole string as lemma
|
212
|
+
# @param lemma [String]
|
213
|
+
# @param pos [String]
|
214
|
+
# @note Used on multiple points.
|
215
|
+
def fred_lemmapos_combine(lemma, pos)
|
216
|
+
lemma.to_s + "." + pos.to_s.gsub(/\./, "DOT")
|
230
217
|
end
|
231
|
-
end
|
232
218
|
|
219
|
+
###
|
220
|
+
# @param lemmapos [String]
|
221
|
+
# @note Used only in FredDetermineTargets.
|
222
|
+
def fred_lemmapos_separate(lemmapos)
|
223
|
+
pieces = lemmapos.split(".")
|
224
|
+
|
225
|
+
if pieces.length > 1
|
226
|
+
return [pieces[0..-2].join("."), pieces[-1]]
|
227
|
+
else
|
228
|
+
# no POS found, treat all of lemmapos as lemma
|
229
|
+
return [lemmapos, nil]
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|