shalmaneser 1.2.0.rc3 → 1.2.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -7
- data/bin/fred +2 -4
- data/doc/exp_files.md +6 -5
- data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
- data/lib/common/config_format_element.rb +220 -0
- data/lib/common/prep_config_data.rb +62 -0
- data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
- data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
- data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
- data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
- data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
- data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
- data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
- data/lib/fred/FredBOWContext.rb +8 -6
- data/lib/fred/FredDetermineTargets.rb +1 -1
- data/lib/fred/FredEval.rb +1 -1
- data/lib/fred/FredFeaturize.rb +22 -16
- data/lib/fred/FredTest.rb +0 -1
- data/lib/fred/fred.rb +2 -0
- data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
- data/lib/fred/opt_parser.rb +1 -1
- data/lib/frprep/frprep.rb +1 -1
- data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
- data/lib/frprep/opt_parser.rb +1 -1
- data/lib/rosy/ExternalConfigData.rb +1 -1
- data/lib/rosy/RosyEval.rb +1 -1
- data/lib/rosy/RosyFeaturize.rb +21 -20
- data/lib/rosy/RosyInspect.rb +1 -1
- data/lib/rosy/RosyPruning.rb +1 -1
- data/lib/rosy/RosyServices.rb +1 -1
- data/lib/rosy/RosySplit.rb +1 -1
- data/lib/rosy/RosyTest.rb +23 -20
- data/lib/rosy/RosyTrain.rb +15 -13
- data/lib/rosy/RosyTrainingTestTable.rb +2 -1
- data/lib/rosy/View.rb +1 -1
- data/lib/rosy/opt_parser.rb +1 -1
- data/lib/rosy/rosy.rb +1 -1
- data/lib/rosy/rosy_config_data.rb +121 -0
- data/lib/shalmaneser/opt_parser.rb +32 -2
- data/lib/shalmaneser/version.rb +1 -1
- metadata +23 -114
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/common/FrPrepConfigData.rb +0 -66
- data/lib/rosy/RosyConfigData.rb +0 -115
- metadata.gz.sig +0 -0
data/lib/fred/FredTest.rb
CHANGED
data/lib/fred/fred.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
# Frame disambiguation system:
|
5
5
|
# access to a configuration and experiment description file
|
6
6
|
|
7
|
-
require "common/
|
7
|
+
require "common/config_data"
|
8
8
|
|
9
9
|
##############################
|
10
10
|
# Class FredConfigData
|
@@ -13,71 +13,73 @@ require "common/ConfigData"
|
|
13
13
|
# sets variable names appropriate to WSD task
|
14
14
|
|
15
15
|
class FredConfigData < ConfigData
|
16
|
-
|
17
|
-
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
16
|
+
CONFIG_DEFS = {
|
17
|
+
"experiment_ID" => "string", # experiment ID
|
18
|
+
"enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
|
19
|
+
|
20
|
+
"preproc_descr_file_train" => "string", # path to preprocessing files
|
21
|
+
"preproc_descr_file_test" => "string",
|
22
|
+
"directory_output" => "string", # path to Salsa/Tiger XML output directory
|
23
|
+
|
24
|
+
"verbose" => "bool" , # print diagnostic messages?
|
25
|
+
"apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
|
26
|
+
|
27
|
+
"fred_directory" => "string",# directory for internal info
|
28
|
+
"classifier_dir" => "string", # write classifiers here
|
29
|
+
|
30
|
+
"classifier" => "list", # classifiers
|
31
|
+
|
32
|
+
"dbtype" => "string", # "mysql" or "sqlite"
|
33
|
+
|
34
|
+
"host" => "string", # DB access: sqlite only
|
35
|
+
"user" => "string",
|
36
|
+
"passwd" => "string",
|
37
|
+
"dbname" => "string",
|
38
|
+
|
39
|
+
# featurization info
|
40
|
+
"feature" => "list", # which features to use for the classifier?
|
41
|
+
"binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
|
42
|
+
"negsense" => "string", # binary classifier: negative sense is..?
|
43
|
+
"numerical_features" => "string", # do what with numerical features?
|
44
|
+
|
45
|
+
# what to do with items that have multiple senses?
|
46
|
+
# 'binarize': binary classifiers, and consider positive
|
47
|
+
# if the sense is among the gold senses
|
48
|
+
# 'join' : make one joint sense
|
49
|
+
# 'repeat' : make multiple occurrences of the item, one sense per occ
|
50
|
+
# 'keep' : keep as separate labels
|
51
|
+
#
|
52
|
+
# multilabel: consider as assigned all labels
|
53
|
+
# above a certain confidence threshold?
|
54
|
+
"handle_multilabel" => "string",
|
55
|
+
"assignment_confidence_threshold" => "float",
|
56
|
+
|
57
|
+
# single-sentence context?
|
58
|
+
"single_sent_context" => "bool",
|
59
|
+
|
60
|
+
# noncontiguous input? then we need access to a larger corpus
|
61
|
+
"noncontiguous_input" => "bool",
|
62
|
+
"larger_corpus_dir" => "string",
|
63
|
+
"larger_corpus_format" => "string",
|
64
|
+
"larger_corpus_encoding" => "string",
|
65
|
+
# Imported from PrepConfigData
|
66
|
+
'do_postag' => 'bool',
|
67
|
+
'do_lemmatize' => 'bool',
|
68
|
+
'do_parse' => 'bool',
|
69
|
+
'pos_tagger' => 'string',
|
70
|
+
'lemmatizer' => 'string',
|
71
|
+
'parser' => 'string',
|
72
|
+
'directory_preprocessed' => 'string',
|
73
|
+
'language' => 'string'
|
74
|
+
}
|
42
75
|
|
43
|
-
|
44
|
-
"feature" => "list", # which features to use for the classifier?
|
45
|
-
"binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
|
46
|
-
"negsense" => "string", # binary classifier: negative sense is..?
|
47
|
-
"numerical_features" => "string", # do what with numerical features?
|
48
|
-
|
49
|
-
# what to do with items that have multiple senses?
|
50
|
-
# 'binarize': binary classifiers, and consider positive
|
51
|
-
# if the sense is among the gold senses
|
52
|
-
# 'join' : make one joint sense
|
53
|
-
# 'repeat' : make multiple occurrences of the item, one sense per occ
|
54
|
-
# 'keep' : keep as separate labels
|
55
|
-
#
|
56
|
-
# multilabel: consider as assigned all labels
|
57
|
-
# above a certain confidence threshold?
|
58
|
-
"handle_multilabel" => "string",
|
59
|
-
"assignment_confidence_threshold" => "float",
|
60
|
-
|
61
|
-
# single-sentence context?
|
62
|
-
"single_sent_context" => "bool",
|
76
|
+
def initialize(filename)
|
63
77
|
|
64
|
-
|
65
|
-
"noncontiguous_input" => "bool",
|
66
|
-
"larger_corpus_dir" => "string",
|
67
|
-
"larger_corpus_format" => "string",
|
68
|
-
"larger_corpus_encoding" => "string"
|
69
|
-
},
|
70
|
-
[ # variables
|
71
|
-
"train",
|
72
|
-
"exp_ID"
|
73
|
-
]
|
74
|
-
)
|
78
|
+
super(filename, CONFIG_DEFS, ["train", "exp_ID"])
|
75
79
|
|
76
80
|
# set access functions for list features
|
77
|
-
set_list_feature_access("classifier",
|
78
|
-
|
79
|
-
set_list_feature_access("feature",
|
80
|
-
method("access_feature"))
|
81
|
+
set_list_feature_access("classifier", method("access_classifier"))
|
82
|
+
set_list_feature_access("feature", method("access_feature"))
|
81
83
|
end
|
82
84
|
|
83
85
|
###
|
@@ -165,14 +167,15 @@ class FredConfigData < ConfigData
|
|
165
167
|
#
|
166
168
|
# returns: a list of pairs [feature_name(string), options(array:string)]
|
167
169
|
# of defined features
|
168
|
-
|
169
|
-
|
170
|
+
# @param val_list [Array] array:array:string: list of tuples defined
|
171
|
+
# in config file for feature 'feature'
|
172
|
+
def access_classifier(val_list)
|
170
173
|
if val_list.nil?
|
171
|
-
|
174
|
+
[]
|
172
175
|
else
|
173
|
-
|
176
|
+
val_list.map do |cl_descr_tuple|
|
174
177
|
[cl_descr_tuple.first, cl_descr_tuple[1..-1]]
|
175
|
-
|
178
|
+
end
|
176
179
|
end
|
177
180
|
end
|
178
181
|
|
data/lib/fred/opt_parser.rb
CHANGED
data/lib/frprep/frprep.rb
CHANGED
@@ -63,12 +63,9 @@ class BerkeleyInterface < SynInterfaceSTXML
|
|
63
63
|
|
64
64
|
parser = ENV['SHALM_BERKELEY_BIN'] || 'berkeleyParser.jar'
|
65
65
|
grammar = ENV['SHALM_BERKELEY_MODEL'] || 'grammar.gr'
|
66
|
+
options = ENV['SHALM_BERKELEY_OPTIONS']
|
66
67
|
|
67
|
-
|
68
|
-
|
69
|
-
#berkeley_prog = "java -d64 -Xmx10000m -jar #{@program_path}berkeley-parser.jar -gr #{@program_path}gerNegra.01.utf8 "
|
70
|
-
|
71
|
-
berkeley_prog = "java -jar #{@program_path}#{parser} -gr #{@program_path}#{grammar}"
|
68
|
+
berkeley_prog = "java -jar #{@program_path}#{parser} #{options} -gr #{@program_path}#{grammar}"
|
72
69
|
|
73
70
|
Dir[in_dir + "*" + @insuffix].each do |inputfilename|
|
74
71
|
|
@@ -139,10 +136,10 @@ class BerkeleyInterface < SynInterfaceSTXML
|
|
139
136
|
# - a parse beginning with <( (>, <( (TOP>, <( (VROOT> etc.
|
140
137
|
# TOP - Negra Grammars
|
141
138
|
# VROOT - Tiger Grammars
|
142
|
-
#
|
139
|
+
# PSEUDO - Original BP Grammars
|
143
140
|
# ROOT - some english grammars
|
144
141
|
# empty identifiers for older Tiger grammars
|
145
|
-
if line.nil? or line
|
142
|
+
if line.nil? or line=~/^(\( *)?\((PSEUDO|TOP|ROOT|VROOT)? / or line=~/^\(\(\)/
|
146
143
|
break
|
147
144
|
end
|
148
145
|
sentid +=1
|
@@ -157,8 +154,9 @@ class BerkeleyInterface < SynInterfaceSTXML
|
|
157
154
|
# Insert a top node <VROOT> if missing.
|
158
155
|
# Some grammars trained on older Tiger Versions
|
159
156
|
# expose this problem.
|
160
|
-
|
161
|
-
|
157
|
+
#STDERR.puts "@@@1 <#{line}>"
|
158
|
+
line.sub!(/^(\(\s+\()(\s+)/, '\1VROOT\2')
|
159
|
+
#STDERR.puts "@@@2 <#{line}>"
|
162
160
|
# berkeley parser output: remove brackets /(.*)/
|
163
161
|
# Remove leading and trailing top level brackets.
|
164
162
|
line.sub!(/^\( */, '')
|
data/lib/frprep/opt_parser.rb
CHANGED
data/lib/rosy/RosyEval.rb
CHANGED
@@ -19,7 +19,7 @@ require "rosy/RosyTask"
|
|
19
19
|
require "rosy/RosyPruning"
|
20
20
|
|
21
21
|
# Frprep packages
|
22
|
-
require "common/
|
22
|
+
require "common/prep_config_data"
|
23
23
|
|
24
24
|
#######################################################################
|
25
25
|
# This class is a subclass of the general evaluation class
|
data/lib/rosy/RosyFeaturize.rb
CHANGED
@@ -9,13 +9,13 @@ require "common/SynInterfaces"
|
|
9
9
|
require "common/ruby_class_extensions"
|
10
10
|
|
11
11
|
# Frprep packages
|
12
|
-
require "common/
|
12
|
+
#require "common/prep_config_data"
|
13
13
|
|
14
14
|
# Rosy packages
|
15
15
|
require "rosy/FailedParses"
|
16
16
|
require "rosy/FeatureInfo"
|
17
17
|
require "rosy/InputData"
|
18
|
-
require "rosy/
|
18
|
+
require "rosy/rosy_config_data"
|
19
19
|
require "common/RosyConventions"
|
20
20
|
require "rosy/RosySplit"
|
21
21
|
require "rosy/RosyTask"
|
@@ -81,24 +81,25 @@ class RosyFeaturize < RosyTask
|
|
81
81
|
|
82
82
|
##
|
83
83
|
# add preprocessing information to the experiment file object
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
84
|
+
# @note AB: Commented out due to separation of PrepConfigData.
|
85
|
+
# if @dataset
|
86
|
+
# preproc_parameter = "preproc_descr_file_" + @dataset
|
87
|
+
# else
|
88
|
+
# # split data
|
89
|
+
# preproc_parameter = "preproc_descr_file_train"
|
90
|
+
# end
|
91
|
+
# preproc_expname = @exp.get(preproc_parameter)
|
92
|
+
# if not(preproc_expname)
|
93
|
+
# $stderr.puts "Please set the name of the preprocessing exp. file name"
|
94
|
+
# $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
|
95
|
+
# exit 1
|
96
|
+
# elsif not(File.readable?(preproc_expname))
|
97
|
+
# $stderr.puts "Error in the experiment file:"
|
98
|
+
# $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
|
99
|
+
# exit 1
|
100
|
+
# end
|
101
|
+
# preproc_exp = FrPrepConfigData.new(preproc_expname)
|
102
|
+
# @exp.adjoin(preproc_exp)
|
102
103
|
|
103
104
|
###
|
104
105
|
# find appropriate class for interpreting syntactic structures
|
data/lib/rosy/RosyInspect.rb
CHANGED
data/lib/rosy/RosyPruning.rb
CHANGED
data/lib/rosy/RosyServices.rb
CHANGED
data/lib/rosy/RosySplit.rb
CHANGED
data/lib/rosy/RosyTest.rb
CHANGED
@@ -24,7 +24,7 @@ require "rosy/RosyTrainingTestTable"
|
|
24
24
|
require "rosy/View"
|
25
25
|
|
26
26
|
# Frprep packages
|
27
|
-
require "common/
|
27
|
+
#require "common/prep_config_data" # AB: what the fuck???
|
28
28
|
|
29
29
|
##########################################################################
|
30
30
|
# classifier combination class
|
@@ -156,25 +156,28 @@ class RosyTest < RosyTask
|
|
156
156
|
|
157
157
|
##
|
158
158
|
# add preprocessing information to the experiment file object
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
159
|
+
# @note AB: Commented out due to separation of PrepConfigData:
|
160
|
+
# information for SynInteraces required.
|
161
|
+
# if @splitID
|
162
|
+
# # use split data
|
163
|
+
# preproc_param = "preproc_descr_file_train"
|
164
|
+
# else
|
165
|
+
# # use test data
|
166
|
+
# preproc_param = "preproc_descr_file_test"
|
167
|
+
# end
|
168
|
+
|
169
|
+
# preproc_expname = @exp.get(preproc_param)
|
170
|
+
# if not(preproc_expname)
|
171
|
+
# $stderr.puts "Please set the name of the preprocessing exp. file name"
|
172
|
+
# $stderr.puts "in the experiment file, parameter #{preproc_param}."
|
173
|
+
# exit 1
|
174
|
+
# elsif not(File.readable?(preproc_expname))
|
175
|
+
# $stderr.puts "Error in the experiment file:"
|
176
|
+
# $stderr.puts "Parameter #{preproc_param} has to be a readable file."
|
177
|
+
# exit 1
|
178
|
+
# end
|
179
|
+
# preproc_exp = FrPrepConfigData.new(preproc_expname)
|
180
|
+
# @exp.adjoin(preproc_exp)
|
178
181
|
|
179
182
|
# announce the task
|
180
183
|
$stderr.puts "---------"
|
data/lib/rosy/RosyTrain.rb
CHANGED
@@ -18,7 +18,7 @@ require "rosy/RosyPruning"
|
|
18
18
|
require "common/ML"
|
19
19
|
|
20
20
|
# Frprep packages
|
21
|
-
require "common/
|
21
|
+
#require "common/prep_config_data"
|
22
22
|
|
23
23
|
class RosyTrain < RosyTask
|
24
24
|
|
@@ -68,18 +68,20 @@ class RosyTrain < RosyTask
|
|
68
68
|
|
69
69
|
##
|
70
70
|
# add preprocessing information to the experiment file object
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
71
|
+
# @note AB: Commented out due to separation of PrepConfigData.
|
72
|
+
# No information seems to be required.
|
73
|
+
# preproc_expname = @exp.get("preproc_descr_file_train")
|
74
|
+
# if not(preproc_expname)
|
75
|
+
# $stderr.puts "Please set the name of the preprocessing exp. file name"
|
76
|
+
# $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
|
77
|
+
# exit 1
|
78
|
+
# elsif not(File.readable?(preproc_expname))
|
79
|
+
# $stderr.puts "Error in the experiment file:"
|
80
|
+
# $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
|
81
|
+
# exit 1
|
82
|
+
# end
|
83
|
+
# preproc_exp = FrPrepConfigData.new(preproc_expname)
|
84
|
+
# @exp.adjoin(preproc_exp)
|
83
85
|
|
84
86
|
|
85
87
|
# get_lf returns: array of pairs [classifier_name, options[array]]
|