shalmaneser-fred 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/fred +8 -3
  4. data/lib/fred/FredConventions.rb +190 -189
  5. data/lib/fred/abstract_context_provider.rb +246 -0
  6. data/lib/fred/abstract_fred_feature_access.rb +43 -0
  7. data/lib/fred/answer_key_access.rb +130 -0
  8. data/lib/fred/aux_keep_writers.rb +94 -0
  9. data/lib/fred/baseline.rb +153 -0
  10. data/lib/fred/context_provider.rb +55 -0
  11. data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
  12. data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
  13. data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
  14. data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
  15. data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
  16. data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
  17. data/lib/fred/feature_extractors.rb +5 -0
  18. data/lib/fred/file_zipped.rb +43 -0
  19. data/lib/fred/find_all_targets.rb +94 -0
  20. data/lib/fred/find_targets_from_frames.rb +92 -0
  21. data/lib/fred/fred.rb +43 -40
  22. data/lib/fred/fred_error.rb +15 -0
  23. data/lib/fred/fred_eval.rb +311 -0
  24. data/lib/fred/fred_feature_access.rb +420 -0
  25. data/lib/fred/fred_feature_info.rb +56 -0
  26. data/lib/fred/fred_featurize.rb +525 -0
  27. data/lib/fred/fred_parameters.rb +190 -0
  28. data/lib/fred/fred_split.rb +86 -0
  29. data/lib/fred/fred_split_pkg.rb +189 -0
  30. data/lib/fred/fred_test.rb +571 -0
  31. data/lib/fred/fred_train.rb +125 -0
  32. data/lib/fred/grammatical_function_access.rb +63 -0
  33. data/lib/fred/md5.rb +6 -0
  34. data/lib/fred/meta_feature_access.rb +185 -0
  35. data/lib/fred/non_contiguous_context_provider.rb +532 -0
  36. data/lib/fred/opt_parser.rb +182 -161
  37. data/lib/fred/plot_and_r_eval.rb +486 -0
  38. data/lib/fred/single_sent_context_provider.rb +76 -0
  39. data/lib/fred/slide_var.rb +148 -0
  40. data/lib/fred/targets.rb +136 -0
  41. data/lib/fred/toggle_var.rb +61 -0
  42. data/lib/fred/word_lemma_pos_ne.rb +51 -0
  43. data/lib/fred/write_features_binary.rb +95 -0
  44. data/lib/fred/write_features_nary.rb +51 -0
  45. data/lib/fred/write_features_nary_or_binary.rb +51 -0
  46. data/lib/shalmaneser/fred.rb +1 -0
  47. metadata +57 -30
  48. data/lib/fred/Baseline.rb +0 -150
  49. data/lib/fred/FileZipped.rb +0 -31
  50. data/lib/fred/FredBOWContext.rb +0 -877
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred_config_data.rb +0 -185
  64. data/test/frprep/test_opt_parser.rb +0 -94
  65. data/test/functional/functional_test_helper.rb +0 -58
  66. data/test/functional/test_fred.rb +0 -47
  67. data/test/functional/test_frprep.rb +0 -99
  68. data/test/functional/test_rosy.rb +0 -40
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e1795de4d92cea5dee25e6840fc1080161aa1d6e
4
- data.tar.gz: 8933ad415fc12fef76184e68e28757b2c6f79ec5
3
+ metadata.gz: 6f7ca2f794d9024633383d41b2092b9f250197bf
4
+ data.tar.gz: d8bc88eeda007ca746e39565709b5e3fd414f58e
5
5
  SHA512:
6
- metadata.gz: 7efd1551dc7e902b2fed0dd717f9eb0b9ac7aa2c010ab2bf91472934f612c066b254175f7feac7d885f8953a8979872203c8f0d6eb040253949aea0090b98eb6
7
- data.tar.gz: 4b46a404e0400483233cb196b3f2a41759db2c98936062a86a097bd404a7759884b4046b5f62f6223ad56d7c62599204238fdcf8e4852f2df59f091faf776822
6
+ metadata.gz: 0c81db071bbee52c8aa337cd9dfccbb7190cbc156cc2f38c580c2647196093b5e08b49170d891762ca1c0965a5e59695371a75a2b63a93cda179fd4f47ced219
7
+ data.tar.gz: e3e07f5a64ba00e6d1eca438a1106cd3a40b89167e0c318e7184c56448d7a6940a2bd0210773ff325989a2277555f016a6ecceb55f50943f10ce3b96bb0531c4
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # [SHALMANESER - a SHALlow seMANtic parSER](http://www.coli.uni-saarland.de/projects/salsa/shal/)
1
+ # SHALMANESER
2
2
 
3
3
  [RubyGems](http://rubygems.org/gems/shalmaneser) |
4
4
  [Shalmanesers Project Page](http://bu.chsta.be/projects/shalmaneser/) |
@@ -7,9 +7,9 @@
7
7
 
8
8
 
9
9
  [![Gem Version](https://img.shields.io/gem/v/shalmaneser.svg")](https://rubygems.org/gems/shalmaneser)
10
- [![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/frprep)
11
- [![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/fred)
12
- [![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/rosy)
10
+ [![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/shalmaneser-prep)
11
+ [![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/shalmaneser-fred)
12
+ [![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/shalmaneser-rosy)
13
13
 
14
14
 
15
15
  [![License GPL 2](http://img.shields.io/badge/License-GPL%202-green.svg)](http://www.gnu.org/licenses/gpl-2.0.txt)
@@ -17,12 +17,44 @@
17
17
  [![Code Climate](https://img.shields.io/codeclimate/github/arbox/shalmaneser.svg")](https://codeclimate.com/github/arbox/shalmaneser)
18
18
  [![Dependency Status](https://img.shields.io/gemnasium/arbox/shalmaneser.svg")](https://gemnasium.com/arbox/shalmaneser)
19
19
 
20
+ [SHALMANESER](http://www.coli.uni-saarland.de/projects/salsa/shal/) is a SHALlow seMANtic parSER.
21
+
22
+ The name Shalmaneser is borrowed from John Brunner. He describes in his novel
23
+ "Stand on Zanzibar" an all knowing supercomputer baptized Shalmaneser.
24
+
25
+ Shalmaneser also has other origins like the king [Shalmaneser III](https://en.wikipedia.org/wiki/Shalmaneser_III).
26
+
27
+ > "SCANALYZER is the one single, the ONLY study of the news in depth
28
+ > that’s processed by General Technics’ famed computer Shalmaneser,
29
+ > who sees all, hears all, knows all save only that which YOU, Mr. and Mrs.
30
+ > Everywhere, wish to keep to yourselves." <br/>
31
+ > John Brunner (1968) "Stand on Zanzibar"
32
+
33
+ > But Shalmaneser is a Micryogenic® computer bathed in liquid helium and it’s cold in his vault. <br/>
34
+ > John Brunner (1968) "Stand on Zanzibar"
35
+
36
+ > “Of course not. Shalmaneser’s main task is to achieve the impossible again, a routine undertaking here at GT.” <br/>
37
+ > John Brunner (1968) "Stand on Zanzibar"
38
+
39
+ > “They programmed Shalmaneser with the formula for this stiffener, see, and…” <br/>
40
+ > John Brunner (1968) "Stand on Zanzibar"
41
+
42
+ > What am I going to do now? <br/>
43
+ > “All right, Shalmaneser!” <br/>
44
+ > John Brunner (1968) "Stand on Zanzibar"
45
+
46
+ > Shalmaneser is a Micryogenic® computer bathed in liquid helium and there’s no sign of Teresa. <br/>
47
+ > John Brunner (1968) "Stand on Zanzibar"
48
+
49
+ > Bathed in his currents of liquid helium, self-contained, immobile, vastly well informed by every mechanical sense: Shalmaneser. <br/>
50
+ > John Brunner (1968) "Stand on Zanzibar"
51
+
20
52
  ## Description
21
53
 
22
54
  Please be careful, the whole thing is under construction! For now Shalmaneser it not intended to run on Windows systems since it heavily uses system calls for external invocations.
23
55
  Current versions of Shalmaneser have been tested on Linux only (other *NIX testers are welcome!).
24
56
 
25
- Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called SRL (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
57
+ Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called [SRL](https://en.wikipedia.org/wiki/Semantic_role_labeling) (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
26
58
 
27
59
  For end users, we provide a simple end user mode which can simply apply the pre-trained classifiers
28
60
  for [English](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (FrameNet 1.3 annotation / Collins parser)
@@ -34,32 +66,27 @@ For researchers interested in investigating shallow semantic parsing, our system
34
66
 
35
67
  ## Origin
36
68
 
37
- The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk and others during their work in the SALSA Project.
69
+ The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk, Alexander Koller, Ines Rehbein, Aljoscha Burchardt and others during their work in the SALSA Project.
38
70
 
39
71
  You can find original versions of Shalmaneser up to ``1.1`` on the [SALSA](http://www.coli.uni-saarland.de/projects/salsa/shal/) project page.
40
72
 
41
73
  ## Publications on Shalmaneser
42
74
 
43
75
  - K. Erk and S. Padó: Shalmaneser - a flexible toolbox for semantic role assignment. Proceedings of LREC 2006, Genoa, Italy. [Click here for details](http://www.nlpado.de/~sebastian/pub/papers/lrec06_erk.pdf).
76
+
44
77
  - TODO: add other works
45
78
 
46
79
  ## Documentation
47
80
 
48
- The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/1.2/doc/index.md) folder.
81
+ The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md) folder.
49
82
 
50
83
  ## Development
51
84
 
52
- We are working now on two branches:
53
-
54
- - ``dev`` - our development branch incorporating actual changes, for now pointing to ``1.2``;
55
-
56
- - ``1.2`` - intermediate target;
57
-
58
- - ``2.0`` - final target.
85
+ We are working now only on the `master` branch. For different intermediate versions see corresponding tags.
59
86
 
60
87
  ## Installation
61
88
 
62
- See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/1.2/doc/index.md#installation) folder.
89
+ See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md#installation) folder.
63
90
 
64
91
  ### Tokenizers
65
92
 
@@ -75,7 +102,7 @@ See the installation instructions in the [doc](https://github.com/arbox/shalmane
75
102
 
76
103
  ### Parsers
77
104
 
78
- - [BerkeleyParser](https://code.google.com/p/berkeleyparser/downloads/list)
105
+ - [BerkeleyParser](https://github.com/slavpetrov/berkeleyparser)
79
106
  - [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml)
80
107
  - [Collins Parser](http://www.cs.columbia.edu/~mcollins/code.html)
81
108
 
@@ -86,8 +113,10 @@ See the installation instructions in the [doc](https://github.com/arbox/shalmane
86
113
 
87
114
  ## License
88
115
 
89
- See the `LICENSE` file.
116
+ Shalmaneser is released under the `GPL v. 2.0` license as of the initial authors.
117
+
118
+ For a local copy of the full license text see the [LICENSE](LICENSE.md) file.
90
119
 
91
120
  ## Contributing
92
121
 
93
- See the `CONTRIBUTING` file.
122
+ Feel free to contact me via Github. Open an issue if you see problems or need help.
data/bin/fred CHANGED
@@ -10,7 +10,12 @@
10
10
  require 'fred/opt_parser'
11
11
  require 'fred/fred'
12
12
 
13
- options = Fred::OptParser.parse(ARGV)
13
+ begin
14
+ options = ::Shalmaneser::Fred::OptParser.parse(ARGV)
14
15
 
15
- fred = Fred::Fred.new(options)
16
- fred.assign
16
+ fred = ::Shalmaneser::Fred::Fred.new(options)
17
+ fred.assign
18
+ rescue => e
19
+ $stderr.puts 'Fred cannot serve you!'
20
+ $stderr.puts e.message, e.backtrace
21
+ end
@@ -4,229 +4,230 @@
4
4
  # several small things that should be uniform
5
5
  # throughout the system
6
6
 
7
- require "common/ruby_class_extensions"
7
+ require 'monkey_patching/file.rb'
8
+ require 'fred/answer_key_access'
8
9
 
9
- require "common/EnduserMode"
10
- class Object
10
+ module Shalmaneser
11
+ module Fred
11
12
 
12
- ###
13
- # joining and breaking up senses
14
- def fred_join_senses(senses)
15
- return senses.sort().join("++")
16
- end
17
-
18
- def fred_split_sense(joined_senses)
19
- return joined_senses.split("++")
20
- end
13
+ module_function
21
14
 
22
- ###
23
- # fred_dirname
24
- #
25
- # constructs a directory name:
26
- # fred data directory / experiment ID / maindir / subdir
27
- #
28
- # if is_existing == existing, the directory is checked for existence,
29
- # if is_existing == new, it is created if necessary
30
- #
31
- # returns: a string
32
- def fred_dirname(exp, # FredConfigData object
33
- maindir, # string: main part of directory name
34
- subdir, # string: subpart of directory name
35
- is_existing = "existing") # string: "existing" or "new", default: existing
36
-
37
- case is_existing
38
- when "existing"
39
- return File.existing_dir(exp.get("fred_directory"),
40
- exp.get("experiment_ID"),
41
- maindir,
42
- subdir)
43
- when "new"
44
- return File.new_dir(exp.get("fred_directory"),
45
- exp.get("experiment_ID"),
46
- maindir,
47
- subdir)
48
- else
49
- raise "Shouldn't be here: #{is_existing}"
50
- end
51
- end
15
+ def determine_training_senses(lemma, exp, lemmas_and_senses_obj, split_id)
16
+ if split_id
17
+ # oh no, we're splitting the dataset into random training and test portions.
18
+ # this means that we actually have to look into the training part of the data to
19
+ # determine the number of training senses
52
20
 
53
- ####
54
- # filenames for feature files
55
- def fred_feature_filename(lemma, sense = nil,
56
- do_binary = false)
57
- if do_binary
58
- return "fred.features.#{lemma}.SENSE.#{sense}"
59
- else
60
- return "fred.features.#{lemma}"
61
- end
62
- end
21
+ senses_hash = {}
63
22
 
64
- ####
65
- # filenames for split files
66
- def fred_split_filename(lemma)
67
- return "fred.split.#{lemma}"
68
- end
23
+ reader = AnswerKeyAccess.new(exp, "train", lemma, "r", split_id, "train")
24
+ reader.each do |_lemma, _pos, _ids, _sids, gold_senses, _transf_gold_senses|
25
+ gold_senses.each { |s| senses_hash[s] = true }
26
+ end
69
27
 
70
- ###
71
- # deconstruct split filename
72
- # returns: lemma
73
- def deconstruct_fred_split_filename(filename)
74
- basename = File.basename(filename)
75
- if basename =~ /^fred\.split\.(.*)/
76
- return $1
77
- else
78
- return nil
79
- end
80
- end
28
+ return senses_hash.keys
81
29
 
82
- ###
83
- # deconstruct feature file name
84
- # returns: hash with keys
85
- # "lemma"
86
- # "sense
87
- def deconstruct_fred_feature_filename(filename)
88
-
89
- basename = File.basename(filename)
90
- retv = Hash.new()
91
- # binary:
92
- # fred.features.#{lemma}.SENSE.#{sense}
93
- if basename =~ /^fred\.features\.(.*)\.SENSE\.(.*)$/
94
- retv["lemma"] = $1
95
- retv["sense"] = $2
96
- elsif basename =~ /^fred\.features\.(.*)/
97
- # fred.features.#{lemma}
98
- retv["lemma"] = $1
99
-
100
- else
101
- # complete mismatch
102
- return nil
30
+ else
31
+ # we're using separate test data.
32
+ # so we can just look up the number of training senses
33
+ # in the lemmas_and_senses object
34
+ senses = lemmas_and_senses_obj.get_senses(lemma)
35
+ if senses
36
+ return senses
37
+ else
38
+ return []
39
+ end
40
+ end
103
41
  end
104
42
 
105
- return retv
106
- end
107
43
 
108
- ####
109
- # filename for answer key files
110
- def fred_answerkey_filename(lemma)
111
- return "fred.answerkey.#{lemma}"
112
- end
44
+ ###
45
+ # joining and breaking up senses
46
+ # @note Used only in FredFeatures.
47
+ def fred_join_senses(senses)
48
+ senses.sort.join("++")
49
+ end
113
50
 
114
- ###
115
- # classifier directory
116
- def fred_classifier_directory(exp, # FredConfigData object
117
- splitID = nil) # string or nil
51
+ # @note Used only in FredEval.
52
+ def fred_split_sense(joined_senses)
53
+ joined_senses.split("++")
54
+ end
118
55
 
119
- if exp.get("classifier_dir")
120
- # user-specified classifier directory
56
+ ###
57
+ # fred_dirname
58
+ #
59
+ # @note Used on multiple positions.
60
+ # constructs a directory name:
61
+ # fred data directory / experiment ID / maindir / subdir
62
+ #
63
+ # if is_existing == existing, the directory is checked for existence,
64
+ # if is_existing == new, it is created if necessary
65
+ #
66
+ # @return [String]
67
+ def fred_dirname(exp, # FredConfigData object
68
+ maindir, # string: main part of directory name
69
+ subdir, # string: subpart of directory name
70
+ is_existing = "existing") # string: "existing" or "new", default: existing
71
+
72
+ case is_existing
73
+ when "existing"
74
+ return File.existing_dir(exp.get("fred_directory"),
75
+ exp.get("experiment_ID"),
76
+ maindir,
77
+ subdir)
78
+ when "new"
79
+ return File.new_dir(exp.get("fred_directory"),
80
+ exp.get("experiment_ID"),
81
+ maindir,
82
+ subdir)
83
+ else
84
+ raise "Shouldn't be here: #{is_existing}"
85
+ end
86
+ end
121
87
 
122
- if splitID
123
- return File.new_dir(exp.get("classifier_dir"), splitID)
88
+ ####
89
+ # filenames for feature files
90
+ # @note Used on multiple points.
91
+ def fred_feature_filename(lemma, sense = nil,
92
+ do_binary = false)
93
+ if do_binary
94
+ return "fred.features.#{lemma}.SENSE.#{sense}"
124
95
  else
125
- return File.new_dir(exp.get("classifier_dir"))
96
+ return "fred.features.#{lemma}"
126
97
  end
98
+ end
127
99
 
128
- else
129
- # my classifier directory
130
- if splitID
131
- return fred_dirname(exp, "classifiers", splitID, "new")
100
+ ###
101
+ # deconstruct split filename
102
+ # returns: lemma
103
+ # @note Not used anywhere.
104
+ def deconstruct_fred_split_filename(filename)
105
+ basename = File.basename(filename)
106
+ if basename =~ /^fred\.split\.(.*)/
107
+ return $1
132
108
  else
133
- return fred_dirname(exp, "classifiers", "all", "new")
109
+ return nil
134
110
  end
135
111
  end
136
- end
137
112
 
138
- ###
139
- # classifier file
140
- def fred_classifier_filename(classifier, lemma, sense=nil)
141
- if sense
142
- return "fred.classif.#{classifier}.LEMMA.#{lemma}.SENSE.#{sense}"
143
- else
144
- return "fred.classif.#{classifier}.LEMMA.#{lemma}"
113
+ ###
114
+ # deconstruct feature file name
115
+ # returns: hash with keys
116
+ # "lemma"
117
+ # "sense
118
+ # @note Used only in FredFeatures.
119
+ def deconstruct_fred_feature_filename(filename)
120
+ basename = File.basename(filename)
121
+ retv = {}
122
+
123
+ # binary:
124
+ # fred.features.#{lemma}.SENSE.#{sense}
125
+ if basename =~ /^fred\.features\.(.*)\.SENSE\.(.*)$/
126
+ retv["lemma"] = $1
127
+ retv["sense"] = $2
128
+ elsif basename =~ /^fred\.features\.(.*)/
129
+ # fred.features.#{lemma}
130
+ retv["lemma"] = $1
131
+
132
+ else
133
+ # complete mismatch
134
+ return nil
135
+ end
136
+
137
+ return retv
145
138
  end
146
- end
147
139
 
148
- def deconstruct_fred_classifier_filename(filename)
149
- retv = Hash.new()
150
- if filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)\.SENSE\.(.*)$/
151
- retv["lemma"] = $2
152
- retv["sense"] = $3
153
- elsif filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)$/
154
- retv["lemma"] = $2
140
+ ####
141
+ # filename for answer key files
142
+ # @note Used only in FredFeatures.
143
+ def fred_answerkey_filename(lemma)
144
+ return "fred.answerkey.#{lemma}"
155
145
  end
156
- return retv
157
- end
158
146
 
159
- ###
160
- # result file
161
- def fred_result_filename(lemma)
162
- return "fred.result.#{lemma.gsub(/\./, "_")}"
163
- end
147
+ ###
148
+ # classifier directory
149
+ # @note Used on multiple points.
150
+ def fred_classifier_directory(exp, # FredConfigData object
151
+ splitID = nil) # string or nil
164
152
 
165
- ##########
166
- # lemma and POS: combine into string separated by
167
- # a separator character
168
- #
169
- # fred_lemmapos_combine: take two strings, return combined string
170
- # if POS is nil, returns lemma<separator character>
171
- # fred_lemmapos_separate: take one string, return two strings
172
- # if no POS could be retrieved, returns nil as POS and the whole string as lemma
173
- def fred_lemmapos_combine(lemma, # string
174
- pos) # string
175
- return lemma.to_s + "." + pos.to_s.gsub(/\./, "DOT")
176
- end
153
+ if exp.get("classifier_dir")
154
+ # user-specified classifier directory
177
155
 
178
- ###
179
- def fred_lemmapos_separate(lemmapos) # string
180
- pieces = lemmapos.split(".")
181
- if pieces.length() > 1
182
- return [ pieces[0..-2].join("."), pieces[-1] ]
183
- else
184
- # no POS found, treat all of lemmapos as lemma
185
- return [ lemmapos, nil ]
186
- end
187
- end
188
- end
156
+ if splitID
157
+ return File.new_dir(exp.get("classifier_dir"), splitID)
158
+ else
159
+ return File.new_dir(exp.get("classifier_dir"))
160
+ end
189
161
 
190
- ########################################
191
- # given a SynNode object representing a terminal,
192
- # return:
193
- # - the word
194
- # - the lemma
195
- # - the part of speech
196
- # - the named entity (if any)
197
- #
198
- # as a tuple
199
- #
200
- # WARNING: word and lemma are turned to lowercase
201
- module WordLemmaPosNe
202
- def word_lemma_pos_ne(syn_obj, # SynNode object
203
- i) # SynInterpreter class
204
- unless syn_obj.is_terminal?
205
- $stderr.puts "Featurization warning: unexpectedly received non-terminal"
206
- return [ nil, nil, nil, nil ]
162
+ else
163
+ # my classifier directory
164
+ if splitID
165
+ return fred_dirname(exp, "classifiers", splitID, "new")
166
+ else
167
+ return fred_dirname(exp, "classifiers", "all", "new")
168
+ end
207
169
  end
170
+ end
208
171
 
209
- word = syn_obj.word()
210
- if word
211
- word.downcase!
172
+ ###
173
+ # classifier file
174
+ # @note Used on multiple points.
175
+ def fred_classifier_filename(classifier, lemma, sense = nil)
176
+ if sense
177
+ return "fred.classif.#{classifier}.LEMMA.#{lemma}.SENSE.#{sense}"
178
+ else
179
+ return "fred.classif.#{classifier}.LEMMA.#{lemma}"
212
180
  end
181
+ end
213
182
 
214
- lemma = i.lemma_backoff(syn_obj)
215
- if lemma and SalsaTigerXMLHelper.unescape(lemma) == "<unknown>"
216
- lemma = nil
217
- end
218
- if lemma
219
- lemma.downcase!
183
+ # @note Used only in Baseline.
184
+ def deconstruct_fred_classifier_filename(filename)
185
+ retv = {}
186
+
187
+ if filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)\.SENSE\.(.*)$/
188
+ retv["lemma"] = $2
189
+ retv["sense"] = $3
190
+ elsif filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)$/
191
+ retv["lemma"] = $2
220
192
  end
221
193
 
222
- pos = syn_obj.part_of_speech()
194
+ retv
195
+ end
223
196
 
224
- ne = syn_obj.get_attribute("ne")
225
- unless ne
226
- ne = syn_obj.get_attribute("headof_ne")
227
- end
197
+ ###
198
+ # result file
199
+ # @note Used on multiple points.
200
+ def fred_result_filename(lemma)
201
+ "fred.result.#{lemma.gsub(/\./, "_")}"
202
+ end
228
203
 
229
- return [word, lemma, pos, ne]
204
+ ##########
205
+ # lemma and POS: combine into string separated by
206
+ # a separator character
207
+ #
208
+ # fred_lemmapos_combine: take two strings, return combined string
209
+ # if POS is nil, returns lemma<separator character>
210
+ # fred_lemmapos_separate: take one string, return two strings
211
+ # if no POS could be retrieved, returns nil as POS and the whole string as lemma
212
+ # @param lemma [String]
213
+ # @param pos [String]
214
+ # @note Used on multiple points.
215
+ def fred_lemmapos_combine(lemma, pos)
216
+ lemma.to_s + "." + pos.to_s.gsub(/\./, "DOT")
230
217
  end
231
- end
232
218
 
219
+ ###
220
+ # @param lemmapos [String]
221
+ # @note Used only in FredDetermineTargets.
222
+ def fred_lemmapos_separate(lemmapos)
223
+ pieces = lemmapos.split(".")
224
+
225
+ if pieces.length > 1
226
+ return [pieces[0..-2].join("."), pieces[-1]]
227
+ else
228
+ # no POS found, treat all of lemmapos as lemma
229
+ return [lemmapos, nil]
230
+ end
231
+ end
232
+ end
233
+ end