shalmaneser 1.2.0.rc3 → 1.2.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -7
- data/bin/fred +2 -4
- data/doc/exp_files.md +6 -5
- data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
- data/lib/common/config_format_element.rb +220 -0
- data/lib/common/prep_config_data.rb +62 -0
- data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
- data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
- data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
- data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
- data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
- data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
- data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
- data/lib/fred/FredBOWContext.rb +8 -6
- data/lib/fred/FredDetermineTargets.rb +1 -1
- data/lib/fred/FredEval.rb +1 -1
- data/lib/fred/FredFeaturize.rb +22 -16
- data/lib/fred/FredTest.rb +0 -1
- data/lib/fred/fred.rb +2 -0
- data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
- data/lib/fred/opt_parser.rb +1 -1
- data/lib/frprep/frprep.rb +1 -1
- data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
- data/lib/frprep/opt_parser.rb +1 -1
- data/lib/rosy/ExternalConfigData.rb +1 -1
- data/lib/rosy/RosyEval.rb +1 -1
- data/lib/rosy/RosyFeaturize.rb +21 -20
- data/lib/rosy/RosyInspect.rb +1 -1
- data/lib/rosy/RosyPruning.rb +1 -1
- data/lib/rosy/RosyServices.rb +1 -1
- data/lib/rosy/RosySplit.rb +1 -1
- data/lib/rosy/RosyTest.rb +23 -20
- data/lib/rosy/RosyTrain.rb +15 -13
- data/lib/rosy/RosyTrainingTestTable.rb +2 -1
- data/lib/rosy/View.rb +1 -1
- data/lib/rosy/opt_parser.rb +1 -1
- data/lib/rosy/rosy.rb +1 -1
- data/lib/rosy/rosy_config_data.rb +121 -0
- data/lib/shalmaneser/opt_parser.rb +32 -2
- data/lib/shalmaneser/version.rb +1 -1
- metadata +23 -114
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/common/FrPrepConfigData.rb +0 -66
- data/lib/rosy/RosyConfigData.rb +0 -115
- metadata.gz.sig +0 -0
data/lib/fred/FredTest.rb
CHANGED
data/lib/fred/fred.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# Frame disambiguation system:
|
5
5
|
# access to a configuration and experiment description file
|
6
6
|
|
7
|
-
require "common/
|
7
|
+
require "common/config_data"
|
8
8
|
|
9
9
|
##############################
|
10
10
|
# Class FredConfigData
|
@@ -13,71 +13,73 @@ require "common/ConfigData"
|
|
13
13
|
# sets variable names appropriate to WSD task
|
14
14
|
|
15
15
|
class FredConfigData < ConfigData
|
16
|
-
|
17
|
-
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
16
|
+
CONFIG_DEFS = {
|
17
|
+
"experiment_ID" => "string", # experiment ID
|
18
|
+
"enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
|
19
|
+
|
20
|
+
"preproc_descr_file_train" => "string", # path to preprocessing files
|
21
|
+
"preproc_descr_file_test" => "string",
|
22
|
+
"directory_output" => "string", # path to Salsa/Tiger XML output directory
|
23
|
+
|
24
|
+
"verbose" => "bool" , # print diagnostic messages?
|
25
|
+
"apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
|
26
|
+
|
27
|
+
"fred_directory" => "string",# directory for internal info
|
28
|
+
"classifier_dir" => "string", # write classifiers here
|
29
|
+
|
30
|
+
"classifier" => "list", # classifiers
|
31
|
+
|
32
|
+
"dbtype" => "string", # "mysql" or "sqlite"
|
33
|
+
|
34
|
+
"host" => "string", # DB access: sqlite only
|
35
|
+
"user" => "string",
|
36
|
+
"passwd" => "string",
|
37
|
+
"dbname" => "string",
|
38
|
+
|
39
|
+
# featurization info
|
40
|
+
"feature" => "list", # which features to use for the classifier?
|
41
|
+
"binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
|
42
|
+
"negsense" => "string", # binary classifier: negative sense is..?
|
43
|
+
"numerical_features" => "string", # do what with numerical features?
|
44
|
+
|
45
|
+
# what to do with items that have multiple senses?
|
46
|
+
# 'binarize': binary classifiers, and consider positive
|
47
|
+
# if the sense is among the gold senses
|
48
|
+
# 'join' : make one joint sense
|
49
|
+
# 'repeat' : make multiple occurrences of the item, one sense per occ
|
50
|
+
# 'keep' : keep as separate labels
|
51
|
+
#
|
52
|
+
# multilabel: consider as assigned all labels
|
53
|
+
# above a certain confidence threshold?
|
54
|
+
"handle_multilabel" => "string",
|
55
|
+
"assignment_confidence_threshold" => "float",
|
56
|
+
|
57
|
+
# single-sentence context?
|
58
|
+
"single_sent_context" => "bool",
|
59
|
+
|
60
|
+
# noncontiguous input? then we need access to a larger corpus
|
61
|
+
"noncontiguous_input" => "bool",
|
62
|
+
"larger_corpus_dir" => "string",
|
63
|
+
"larger_corpus_format" => "string",
|
64
|
+
"larger_corpus_encoding" => "string",
|
65
|
+
# Imported from PrepConfigData
|
66
|
+
'do_postag' => 'bool',
|
67
|
+
'do_lemmatize' => 'bool',
|
68
|
+
'do_parse' => 'bool',
|
69
|
+
'pos_tagger' => 'string',
|
70
|
+
'lemmatizer' => 'string',
|
71
|
+
'parser' => 'string',
|
72
|
+
'directory_preprocessed' => 'string',
|
73
|
+
'language' => 'string'
|
74
|
+
}
|
42
75
|
|
43
|
-
|
44
|
-
"feature" => "list", # which features to use for the classifier?
|
45
|
-
"binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
|
46
|
-
"negsense" => "string", # binary classifier: negative sense is..?
|
47
|
-
"numerical_features" => "string", # do what with numerical features?
|
48
|
-
|
49
|
-
# what to do with items that have multiple senses?
|
50
|
-
# 'binarize': binary classifiers, and consider positive
|
51
|
-
# if the sense is among the gold senses
|
52
|
-
# 'join' : make one joint sense
|
53
|
-
# 'repeat' : make multiple occurrences of the item, one sense per occ
|
54
|
-
# 'keep' : keep as separate labels
|
55
|
-
#
|
56
|
-
# multilabel: consider as assigned all labels
|
57
|
-
# above a certain confidence threshold?
|
58
|
-
"handle_multilabel" => "string",
|
59
|
-
"assignment_confidence_threshold" => "float",
|
60
|
-
|
61
|
-
# single-sentence context?
|
62
|
-
"single_sent_context" => "bool",
|
76
|
+
def initialize(filename)
|
63
77
|
|
64
|
-
|
65
|
-
"noncontiguous_input" => "bool",
|
66
|
-
"larger_corpus_dir" => "string",
|
67
|
-
"larger_corpus_format" => "string",
|
68
|
-
"larger_corpus_encoding" => "string"
|
69
|
-
},
|
70
|
-
[ # variables
|
71
|
-
"train",
|
72
|
-
"exp_ID"
|
73
|
-
]
|
74
|
-
)
|
78
|
+
super(filename, CONFIG_DEFS, ["train", "exp_ID"])
|
75
79
|
|
76
80
|
# set access functions for list features
|
77
|
-
set_list_feature_access("classifier",
|
78
|
-
|
79
|
-
set_list_feature_access("feature",
|
80
|
-
method("access_feature"))
|
81
|
+
set_list_feature_access("classifier", method("access_classifier"))
|
82
|
+
set_list_feature_access("feature", method("access_feature"))
|
81
83
|
end
|
82
84
|
|
83
85
|
###
|
@@ -165,14 +167,15 @@ class FredConfigData < ConfigData
|
|
165
167
|
#
|
166
168
|
# returns: a list of pairs [feature_name(string), options(array:string)]
|
167
169
|
# of defined features
|
168
|
-
|
169
|
-
|
170
|
+
# @param val_list [Array] array:array:string: list of tuples defined
|
171
|
+
# in config file for feature 'feature'
|
172
|
+
def access_classifier(val_list)
|
170
173
|
if val_list.nil?
|
171
|
-
|
174
|
+
[]
|
172
175
|
else
|
173
|
-
|
176
|
+
val_list.map do |cl_descr_tuple|
|
174
177
|
[cl_descr_tuple.first, cl_descr_tuple[1..-1]]
|
175
|
-
|
178
|
+
end
|
176
179
|
end
|
177
180
|
end
|
178
181
|
|
data/lib/fred/opt_parser.rb
CHANGED
data/lib/frprep/frprep.rb
CHANGED
@@ -63,12 +63,9 @@ class BerkeleyInterface < SynInterfaceSTXML
|
|
63
63
|
|
64
64
|
parser = ENV['SHALM_BERKELEY_BIN'] || 'berkeleyParser.jar'
|
65
65
|
grammar = ENV['SHALM_BERKELEY_MODEL'] || 'grammar.gr'
|
66
|
+
options = ENV['SHALM_BERKELEY_OPTIONS']
|
66
67
|
|
67
|
-
|
68
|
-
|
69
|
-
#berkeley_prog = "java -d64 -Xmx10000m -jar #{@program_path}berkeley-parser.jar -gr #{@program_path}gerNegra.01.utf8 "
|
70
|
-
|
71
|
-
berkeley_prog = "java -jar #{@program_path}#{parser} -gr #{@program_path}#{grammar}"
|
68
|
+
berkeley_prog = "java -jar #{@program_path}#{parser} #{options} -gr #{@program_path}#{grammar}"
|
72
69
|
|
73
70
|
Dir[in_dir + "*" + @insuffix].each do |inputfilename|
|
74
71
|
|
@@ -139,10 +136,10 @@ class BerkeleyInterface < SynInterfaceSTXML
|
|
139
136
|
# - a parse beginning with <( (>, <( (TOP>, <( (VROOT> etc.
|
140
137
|
# TOP - Negra Grammars
|
141
138
|
# VROOT - Tiger Grammars
|
142
|
-
#
|
139
|
+
# PSEUDO - Original BP Grammars
|
143
140
|
# ROOT - some english grammars
|
144
141
|
# empty identifiers for older Tiger grammars
|
145
|
-
if line.nil? or line
|
142
|
+
if line.nil? or line=~/^(\( *)?\((PSEUDO|TOP|ROOT|VROOT)? / or line=~/^\(\(\)/
|
146
143
|
break
|
147
144
|
end
|
148
145
|
sentid +=1
|
@@ -157,8 +154,9 @@ class BerkeleyInterface < SynInterfaceSTXML
|
|
157
154
|
# Insert a top node <VROOT> if missing.
|
158
155
|
# Some grammars trained on older Tiger Versions
|
159
156
|
# expose this problem.
|
160
|
-
|
161
|
-
|
157
|
+
#STDERR.puts "@@@1 <#{line}>"
|
158
|
+
line.sub!(/^(\(\s+\()(\s+)/, '\1VROOT\2')
|
159
|
+
#STDERR.puts "@@@2 <#{line}>"
|
162
160
|
# berkeley parser output: remove brackets /(.*)/
|
163
161
|
# Remove leading and trailing top level brackets.
|
164
162
|
line.sub!(/^\( */, '')
|
data/lib/frprep/opt_parser.rb
CHANGED
data/lib/rosy/RosyEval.rb
CHANGED
@@ -19,7 +19,7 @@ require "rosy/RosyTask"
|
|
19
19
|
require "rosy/RosyPruning"
|
20
20
|
|
21
21
|
# Frprep packages
|
22
|
-
require "common/
|
22
|
+
require "common/prep_config_data"
|
23
23
|
|
24
24
|
#######################################################################
|
25
25
|
# This class is a subclass of the general evaluation class
|
data/lib/rosy/RosyFeaturize.rb
CHANGED
@@ -9,13 +9,13 @@ require "common/SynInterfaces"
|
|
9
9
|
require "common/ruby_class_extensions"
|
10
10
|
|
11
11
|
# Frprep packages
|
12
|
-
require "common/
|
12
|
+
#require "common/prep_config_data"
|
13
13
|
|
14
14
|
# Rosy packages
|
15
15
|
require "rosy/FailedParses"
|
16
16
|
require "rosy/FeatureInfo"
|
17
17
|
require "rosy/InputData"
|
18
|
-
require "rosy/
|
18
|
+
require "rosy/rosy_config_data"
|
19
19
|
require "common/RosyConventions"
|
20
20
|
require "rosy/RosySplit"
|
21
21
|
require "rosy/RosyTask"
|
@@ -81,24 +81,25 @@ class RosyFeaturize < RosyTask
|
|
81
81
|
|
82
82
|
##
|
83
83
|
# add preprocessing information to the experiment file object
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
84
|
+
# @note AB: Commented out due to separation of PrepConfigData.
|
85
|
+
# if @dataset
|
86
|
+
# preproc_parameter = "preproc_descr_file_" + @dataset
|
87
|
+
# else
|
88
|
+
# # split data
|
89
|
+
# preproc_parameter = "preproc_descr_file_train"
|
90
|
+
# end
|
91
|
+
# preproc_expname = @exp.get(preproc_parameter)
|
92
|
+
# if not(preproc_expname)
|
93
|
+
# $stderr.puts "Please set the name of the preprocessing exp. file name"
|
94
|
+
# $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
|
95
|
+
# exit 1
|
96
|
+
# elsif not(File.readable?(preproc_expname))
|
97
|
+
# $stderr.puts "Error in the experiment file:"
|
98
|
+
# $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
|
99
|
+
# exit 1
|
100
|
+
# end
|
101
|
+
# preproc_exp = FrPrepConfigData.new(preproc_expname)
|
102
|
+
# @exp.adjoin(preproc_exp)
|
102
103
|
|
103
104
|
###
|
104
105
|
# find appropriate class for interpreting syntactic structures
|
data/lib/rosy/RosyInspect.rb
CHANGED
data/lib/rosy/RosyPruning.rb
CHANGED
data/lib/rosy/RosyServices.rb
CHANGED
data/lib/rosy/RosySplit.rb
CHANGED
data/lib/rosy/RosyTest.rb
CHANGED
@@ -24,7 +24,7 @@ require "rosy/RosyTrainingTestTable"
|
|
24
24
|
require "rosy/View"
|
25
25
|
|
26
26
|
# Frprep packages
|
27
|
-
require "common/
|
27
|
+
#require "common/prep_config_data" # AB: what the fuck???
|
28
28
|
|
29
29
|
##########################################################################
|
30
30
|
# classifier combination class
|
@@ -156,25 +156,28 @@ class RosyTest < RosyTask
|
|
156
156
|
|
157
157
|
##
|
158
158
|
# add preprocessing information to the experiment file object
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
159
|
+
# @note AB: Commented out due to separation of PrepConfigData:
|
160
|
+
# information for SynInteraces required.
|
161
|
+
# if @splitID
|
162
|
+
# # use split data
|
163
|
+
# preproc_param = "preproc_descr_file_train"
|
164
|
+
# else
|
165
|
+
# # use test data
|
166
|
+
# preproc_param = "preproc_descr_file_test"
|
167
|
+
# end
|
168
|
+
|
169
|
+
# preproc_expname = @exp.get(preproc_param)
|
170
|
+
# if not(preproc_expname)
|
171
|
+
# $stderr.puts "Please set the name of the preprocessing exp. file name"
|
172
|
+
# $stderr.puts "in the experiment file, parameter #{preproc_param}."
|
173
|
+
# exit 1
|
174
|
+
# elsif not(File.readable?(preproc_expname))
|
175
|
+
# $stderr.puts "Error in the experiment file:"
|
176
|
+
# $stderr.puts "Parameter #{preproc_param} has to be a readable file."
|
177
|
+
# exit 1
|
178
|
+
# end
|
179
|
+
# preproc_exp = FrPrepConfigData.new(preproc_expname)
|
180
|
+
# @exp.adjoin(preproc_exp)
|
178
181
|
|
179
182
|
# announce the task
|
180
183
|
$stderr.puts "---------"
|
data/lib/rosy/RosyTrain.rb
CHANGED
@@ -18,7 +18,7 @@ require "rosy/RosyPruning"
|
|
18
18
|
require "common/ML"
|
19
19
|
|
20
20
|
# Frprep packages
|
21
|
-
require "common/
|
21
|
+
#require "common/prep_config_data"
|
22
22
|
|
23
23
|
class RosyTrain < RosyTask
|
24
24
|
|
@@ -68,18 +68,20 @@ class RosyTrain < RosyTask
|
|
68
68
|
|
69
69
|
##
|
70
70
|
# add preprocessing information to the experiment file object
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
71
|
+
# @note AB: Commented out due to separation of PrepConfigData.
|
72
|
+
# No information seems to be required.
|
73
|
+
# preproc_expname = @exp.get("preproc_descr_file_train")
|
74
|
+
# if not(preproc_expname)
|
75
|
+
# $stderr.puts "Please set the name of the preprocessing exp. file name"
|
76
|
+
# $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
|
77
|
+
# exit 1
|
78
|
+
# elsif not(File.readable?(preproc_expname))
|
79
|
+
# $stderr.puts "Error in the experiment file:"
|
80
|
+
# $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
|
81
|
+
# exit 1
|
82
|
+
# end
|
83
|
+
# preproc_exp = FrPrepConfigData.new(preproc_expname)
|
84
|
+
# @exp.adjoin(preproc_exp)
|
83
85
|
|
84
86
|
|
85
87
|
# get_lf returns: array of pairs [classifier_name, options[array]]
|