shalmaneser 1.2.0.rc3 → 1.2.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +26 -7
  3. data/bin/fred +2 -4
  4. data/doc/exp_files.md +6 -5
  5. data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
  6. data/lib/common/config_format_element.rb +220 -0
  7. data/lib/common/prep_config_data.rb +62 -0
  8. data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
  9. data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
  10. data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
  11. data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
  12. data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
  13. data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
  14. data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
  15. data/lib/fred/FredBOWContext.rb +8 -6
  16. data/lib/fred/FredDetermineTargets.rb +1 -1
  17. data/lib/fred/FredEval.rb +1 -1
  18. data/lib/fred/FredFeaturize.rb +22 -16
  19. data/lib/fred/FredTest.rb +0 -1
  20. data/lib/fred/fred.rb +2 -0
  21. data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
  22. data/lib/fred/opt_parser.rb +1 -1
  23. data/lib/frprep/frprep.rb +1 -1
  24. data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
  25. data/lib/frprep/opt_parser.rb +1 -1
  26. data/lib/rosy/ExternalConfigData.rb +1 -1
  27. data/lib/rosy/RosyEval.rb +1 -1
  28. data/lib/rosy/RosyFeaturize.rb +21 -20
  29. data/lib/rosy/RosyInspect.rb +1 -1
  30. data/lib/rosy/RosyPruning.rb +1 -1
  31. data/lib/rosy/RosyServices.rb +1 -1
  32. data/lib/rosy/RosySplit.rb +1 -1
  33. data/lib/rosy/RosyTest.rb +23 -20
  34. data/lib/rosy/RosyTrain.rb +15 -13
  35. data/lib/rosy/RosyTrainingTestTable.rb +2 -1
  36. data/lib/rosy/View.rb +1 -1
  37. data/lib/rosy/opt_parser.rb +1 -1
  38. data/lib/rosy/rosy.rb +1 -1
  39. data/lib/rosy/rosy_config_data.rb +121 -0
  40. data/lib/shalmaneser/opt_parser.rb +32 -2
  41. data/lib/shalmaneser/version.rb +1 -1
  42. metadata +23 -114
  43. checksums.yaml.gz.sig +0 -0
  44. data.tar.gz.sig +0 -0
  45. data/lib/common/FrPrepConfigData.rb +0 -66
  46. data/lib/rosy/RosyConfigData.rb +0 -115
  47. metadata.gz.sig +0 -0
data/lib/fred/FredTest.rb CHANGED
@@ -16,7 +16,6 @@ require "common/SalsaTigerRegXML"
16
16
  require "common/ruby_class_extensions"
17
17
 
18
18
  # Shalmaneser packages
19
- require "common/FrPrepConfigData"
20
19
  require "common/ML"
21
20
  require "fred/Baseline"
22
21
  require "fred/FredConventions"
data/lib/fred/fred.rb CHANGED
@@ -34,6 +34,8 @@ module Fred
34
34
  task_obj = FredEval.new(@exp, @opts)
35
35
  else
36
36
  raise "Shouldn't be here"
37
+ # @todo AB: this <else> condition should be unpossible
38
+ # do in OptionParser
37
39
  end
38
40
 
39
41
  task_obj.compute
@@ -4,7 +4,7 @@
4
4
  # Frame disambiguation system:
5
5
  # access to a configuration and experiment description file
6
6
 
7
- require "common/ConfigData"
7
+ require "common/config_data"
8
8
 
9
9
  ##############################
10
10
  # Class FredConfigData
@@ -13,71 +13,73 @@ require "common/ConfigData"
13
13
  # sets variable names appropriate to WSD task
14
14
 
15
15
  class FredConfigData < ConfigData
16
- def initialize(filename)
17
-
18
- # initialize config data object
19
- super(filename, # config file
20
- {
21
- "experiment_ID" => "string", # experiment ID
22
- "enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
23
-
24
- "preproc_descr_file_train" => "string", # path to preprocessing files
25
- "preproc_descr_file_test" => "string",
26
- "directory_output" => "string", # path to Salsa/Tiger XML output directory
27
-
28
- "verbose" => "bool" , # print diagnostic messages?
29
- "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
30
-
31
- "fred_directory" => "string",# directory for internal info
32
- "classifier_dir" => "string", # write classifiers here
33
-
34
- "classifier" => "list", # classifiers
35
-
36
- "dbtype" => "string", # "mysql" or "sqlite"
37
-
38
- "host" => "string", # DB access: sqlite only
39
- "user" => "string",
40
- "passwd" => "string",
41
- "dbname" => "string",
16
+ CONFIG_DEFS = {
17
+ "experiment_ID" => "string", # experiment ID
18
+ "enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
19
+
20
+ "preproc_descr_file_train" => "string", # path to preprocessing files
21
+ "preproc_descr_file_test" => "string",
22
+ "directory_output" => "string", # path to Salsa/Tiger XML output directory
23
+
24
+ "verbose" => "bool" , # print diagnostic messages?
25
+ "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
26
+
27
+ "fred_directory" => "string",# directory for internal info
28
+ "classifier_dir" => "string", # write classifiers here
29
+
30
+ "classifier" => "list", # classifiers
31
+
32
+ "dbtype" => "string", # "mysql" or "sqlite"
33
+
34
+ "host" => "string", # DB access: sqlite only
35
+ "user" => "string",
36
+ "passwd" => "string",
37
+ "dbname" => "string",
38
+
39
+ # featurization info
40
+ "feature" => "list", # which features to use for the classifier?
41
+ "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
42
+ "negsense" => "string", # binary classifier: negative sense is..?
43
+ "numerical_features" => "string", # do what with numerical features?
44
+
45
+ # what to do with items that have multiple senses?
46
+ # 'binarize': binary classifiers, and consider positive
47
+ # if the sense is among the gold senses
48
+ # 'join' : make one joint sense
49
+ # 'repeat' : make multiple occurrences of the item, one sense per occ
50
+ # 'keep' : keep as separate labels
51
+ #
52
+ # multilabel: consider as assigned all labels
53
+ # above a certain confidence threshold?
54
+ "handle_multilabel" => "string",
55
+ "assignment_confidence_threshold" => "float",
56
+
57
+ # single-sentence context?
58
+ "single_sent_context" => "bool",
59
+
60
+ # noncontiguous input? then we need access to a larger corpus
61
+ "noncontiguous_input" => "bool",
62
+ "larger_corpus_dir" => "string",
63
+ "larger_corpus_format" => "string",
64
+ "larger_corpus_encoding" => "string",
65
+ # Imported from PrepConfigData
66
+ 'do_postag' => 'bool',
67
+ 'do_lemmatize' => 'bool',
68
+ 'do_parse' => 'bool',
69
+ 'pos_tagger' => 'string',
70
+ 'lemmatizer' => 'string',
71
+ 'parser' => 'string',
72
+ 'directory_preprocessed' => 'string',
73
+ 'language' => 'string'
74
+ }
42
75
 
43
- # featurization info
44
- "feature" => "list", # which features to use for the classifier?
45
- "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
46
- "negsense" => "string", # binary classifier: negative sense is..?
47
- "numerical_features" => "string", # do what with numerical features?
48
-
49
- # what to do with items that have multiple senses?
50
- # 'binarize': binary classifiers, and consider positive
51
- # if the sense is among the gold senses
52
- # 'join' : make one joint sense
53
- # 'repeat' : make multiple occurrences of the item, one sense per occ
54
- # 'keep' : keep as separate labels
55
- #
56
- # multilabel: consider as assigned all labels
57
- # above a certain confidence threshold?
58
- "handle_multilabel" => "string",
59
- "assignment_confidence_threshold" => "float",
60
-
61
- # single-sentence context?
62
- "single_sent_context" => "bool",
76
+ def initialize(filename)
63
77
 
64
- # noncontiguous input? then we need access to a larger corpus
65
- "noncontiguous_input" => "bool",
66
- "larger_corpus_dir" => "string",
67
- "larger_corpus_format" => "string",
68
- "larger_corpus_encoding" => "string"
69
- },
70
- [ # variables
71
- "train",
72
- "exp_ID"
73
- ]
74
- )
78
+ super(filename, CONFIG_DEFS, ["train", "exp_ID"])
75
79
 
76
80
  # set access functions for list features
77
- set_list_feature_access("classifier",
78
- method("access_classifier"))
79
- set_list_feature_access("feature",
80
- method("access_feature"))
81
+ set_list_feature_access("classifier", method("access_classifier"))
82
+ set_list_feature_access("feature", method("access_feature"))
81
83
  end
82
84
 
83
85
  ###
@@ -165,14 +167,15 @@ class FredConfigData < ConfigData
165
167
  #
166
168
  # returns: a list of pairs [feature_name(string), options(array:string)]
167
169
  # of defined features
168
- def access_classifier(val_list) # array:array:string: list of tuples defined in config file
169
- # for feature 'feature'
170
+ # @param val_list [Array] array:array:string: list of tuples defined
171
+ # in config file for feature 'feature'
172
+ def access_classifier(val_list)
170
173
  if val_list.nil?
171
- return []
174
+ []
172
175
  else
173
- return val_list.map { |cl_descr_tuple|
176
+ val_list.map do |cl_descr_tuple|
174
177
  [cl_descr_tuple.first, cl_descr_tuple[1..-1]]
175
- }
178
+ end
176
179
  end
177
180
  end
178
181
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  #require 'optparse' # for reimplementation
6
6
  require 'getoptlong'
7
- require "fred/FredConfigData"
7
+ require "fred/fred_config_data"
8
8
 
9
9
  module Fred
10
10
 
data/lib/frprep/frprep.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'frprep/do_parses'
2
- require 'common/frprep_helper'
2
+ require 'common/prep_helper'
3
3
  require 'common/FixSynSemMapping'
4
4
  # For FN input.
5
5
  require 'frprep/FNCorpusXML'
@@ -63,12 +63,9 @@ class BerkeleyInterface < SynInterfaceSTXML
63
63
 
64
64
  parser = ENV['SHALM_BERKELEY_BIN'] || 'berkeleyParser.jar'
65
65
  grammar = ENV['SHALM_BERKELEY_MODEL'] || 'grammar.gr'
66
+ options = ENV['SHALM_BERKELEY_OPTIONS']
66
67
 
67
- #berkeley_prog = "java -Xmx2000m -jar #{@program_path}berkeleyParser.jar -gr #{@program_path}ger_sm5.gr"
68
-
69
- #berkeley_prog = "java -d64 -Xmx10000m -jar #{@program_path}berkeley-parser.jar -gr #{@program_path}gerNegra.01.utf8 "
70
-
71
- berkeley_prog = "java -jar #{@program_path}#{parser} -gr #{@program_path}#{grammar}"
68
+ berkeley_prog = "java -jar #{@program_path}#{parser} #{options} -gr #{@program_path}#{grammar}"
72
69
 
73
70
  Dir[in_dir + "*" + @insuffix].each do |inputfilename|
74
71
 
@@ -139,10 +136,10 @@ class BerkeleyInterface < SynInterfaceSTXML
139
136
  # - a parse beginning with <( (>, <( (TOP>, <( (VROOT> etc.
140
137
  # TOP - Negra Grammars
141
138
  # VROOT - Tiger Grammars
142
- # PSEUDE - Original BP Grammars
139
+ # PSEUDO - Original BP Grammars
143
140
  # ROOT - some english grammars
144
141
  # empty identifiers for older Tiger grammars
145
- if line.nil? or line=~/^\( *\((PSEUDO|TOP|ROOT|VROOT)? / or line=~/^\(\(\)/
142
+ if line.nil? or line=~/^(\( *)?\((PSEUDO|TOP|ROOT|VROOT)? / or line=~/^\(\(\)/
146
143
  break
147
144
  end
148
145
  sentid +=1
@@ -157,8 +154,9 @@ class BerkeleyInterface < SynInterfaceSTXML
157
154
  # Insert a top node <VROOT> if missing.
158
155
  # Some grammars trained on older Tiger Versions
159
156
  # expose this problem.
160
- line.sub!(/^(\(\s+\(\s+)/, '\1VROOT')
161
-
157
+ #STDERR.puts "@@@1 <#{line}>"
158
+ line.sub!(/^(\(\s+\()(\s+)/, '\1VROOT\2')
159
+ #STDERR.puts "@@@2 <#{line}>"
162
160
  # berkeley parser output: remove brackets /(.*)/
163
161
  # Remove leading and trailing top level brackets.
164
162
  line.sub!(/^\( */, '')
@@ -3,7 +3,7 @@
3
3
  # AB, 2010-11-25
4
4
 
5
5
  require 'optparse'
6
- require 'common/FrPrepConfigData'
6
+ require 'common/prep_config_data'
7
7
  require 'common/SynInterfaces'
8
8
  module FrPrep
9
9
 
@@ -5,7 +5,7 @@
5
5
  # for Fred and Rosy:
6
6
  # access to configuration and experiment description file
7
7
 
8
- require 'common/ConfigData'
8
+ require 'common/config_data'
9
9
 
10
10
  ##############################
11
11
  # Class ExternalConfigData
data/lib/rosy/RosyEval.rb CHANGED
@@ -19,7 +19,7 @@ require "rosy/RosyTask"
19
19
  require "rosy/RosyPruning"
20
20
 
21
21
  # Frprep packages
22
- require "common/FrPrepConfigData"
22
+ require "common/prep_config_data"
23
23
 
24
24
  #######################################################################
25
25
  # This class is a subclass of the general evaluation class
@@ -9,13 +9,13 @@ require "common/SynInterfaces"
9
9
  require "common/ruby_class_extensions"
10
10
 
11
11
  # Frprep packages
12
- require "common/FrPrepConfigData"
12
+ #require "common/prep_config_data"
13
13
 
14
14
  # Rosy packages
15
15
  require "rosy/FailedParses"
16
16
  require "rosy/FeatureInfo"
17
17
  require "rosy/InputData"
18
- require "rosy/RosyConfigData"
18
+ require "rosy/rosy_config_data"
19
19
  require "common/RosyConventions"
20
20
  require "rosy/RosySplit"
21
21
  require "rosy/RosyTask"
@@ -81,24 +81,25 @@ class RosyFeaturize < RosyTask
81
81
 
82
82
  ##
83
83
  # add preprocessing information to the experiment file object
84
- if @dataset
85
- preproc_parameter = "preproc_descr_file_" + @dataset
86
- else
87
- # split data
88
- preproc_parameter = "preproc_descr_file_train"
89
- end
90
- preproc_expname = @exp.get(preproc_parameter)
91
- if not(preproc_expname)
92
- $stderr.puts "Please set the name of the preprocessing exp. file name"
93
- $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
94
- exit 1
95
- elsif not(File.readable?(preproc_expname))
96
- $stderr.puts "Error in the experiment file:"
97
- $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
98
- exit 1
99
- end
100
- preproc_exp = FrPrepConfigData.new(preproc_expname)
101
- @exp.adjoin(preproc_exp)
84
+ # @note AB: Commented out due to separation of PrepConfigData.
85
+ # if @dataset
86
+ # preproc_parameter = "preproc_descr_file_" + @dataset
87
+ # else
88
+ # # split data
89
+ # preproc_parameter = "preproc_descr_file_train"
90
+ # end
91
+ # preproc_expname = @exp.get(preproc_parameter)
92
+ # if not(preproc_expname)
93
+ # $stderr.puts "Please set the name of the preprocessing exp. file name"
94
+ # $stderr.puts "in the experiment file, parameter #{preproc_parameter}"
95
+ # exit 1
96
+ # elsif not(File.readable?(preproc_expname))
97
+ # $stderr.puts "Error in the experiment file:"
98
+ # $stderr.puts "Parameter #{preproc_parameter} has to be a readable file."
99
+ # exit 1
100
+ # end
101
+ # preproc_exp = FrPrepConfigData.new(preproc_expname)
102
+ # @exp.adjoin(preproc_exp)
102
103
 
103
104
  ###
104
105
  # find appropriate class for interpreting syntactic structures
@@ -12,7 +12,7 @@ require "rosy/RosyTrainingTestTable"
12
12
  require "rosy/View"
13
13
 
14
14
  # Frprep packages
15
- require "common/FrPrepConfigData"
15
+ require "common/prep_config_data"
16
16
 
17
17
  class RosyInspect < RosyTask
18
18
 
@@ -12,7 +12,7 @@ require "common/ruby_class_extensions"
12
12
 
13
13
  require "rosy/RosyFeatureExtractors"
14
14
  require "common/RosyConventions"
15
- require "rosy/RosyConfigData"
15
+ require "rosy/rosy_config_data"
16
16
  require "rosy/RosyIterator"
17
17
 
18
18
  ###
@@ -16,7 +16,7 @@ require "rosy/RosyTrainingTestTable"
16
16
  require "rosy/View"
17
17
 
18
18
  # Frprep packages
19
- require "common/FrPrepConfigData"
19
+ require "common/prep_config_data"
20
20
 
21
21
  ###################################################
22
22
  class RosyServices < RosyTask
@@ -16,7 +16,7 @@
16
16
  require "common/ruby_class_extensions"
17
17
 
18
18
  # Frprep packages
19
- require "common/FrPrepConfigData"
19
+ require "common/prep_config_data"
20
20
 
21
21
  # Rosy packages
22
22
  require "rosy/FailedParses"
data/lib/rosy/RosyTest.rb CHANGED
@@ -24,7 +24,7 @@ require "rosy/RosyTrainingTestTable"
24
24
  require "rosy/View"
25
25
 
26
26
  # Frprep packages
27
- require "common/FrPrepConfigData" # AB: what the fuck???
27
+ #require "common/prep_config_data" # AB: what the fuck???
28
28
 
29
29
  ##########################################################################
30
30
  # classifier combination class
@@ -156,25 +156,28 @@ class RosyTest < RosyTask
156
156
 
157
157
  ##
158
158
  # add preprocessing information to the experiment file object
159
- if @splitID
160
- # use split data
161
- preproc_param = "preproc_descr_file_train"
162
- else
163
- # use test data
164
- preproc_param = "preproc_descr_file_test"
165
- end
166
- preproc_expname = @exp.get(preproc_param)
167
- if not(preproc_expname)
168
- $stderr.puts "Please set the name of the preprocessing exp. file name"
169
- $stderr.puts "in the experiment file, parameter #{preproc_param}."
170
- exit 1
171
- elsif not(File.readable?(preproc_expname))
172
- $stderr.puts "Error in the experiment file:"
173
- $stderr.puts "Parameter #{preproc_param} has to be a readable file."
174
- exit 1
175
- end
176
- preproc_exp = FrPrepConfigData.new(preproc_expname)
177
- @exp.adjoin(preproc_exp)
159
+ # @note AB: Commented out due to separation of PrepConfigData:
160
+ # information for SynInteraces required.
161
+ # if @splitID
162
+ # # use split data
163
+ # preproc_param = "preproc_descr_file_train"
164
+ # else
165
+ # # use test data
166
+ # preproc_param = "preproc_descr_file_test"
167
+ # end
168
+
169
+ # preproc_expname = @exp.get(preproc_param)
170
+ # if not(preproc_expname)
171
+ # $stderr.puts "Please set the name of the preprocessing exp. file name"
172
+ # $stderr.puts "in the experiment file, parameter #{preproc_param}."
173
+ # exit 1
174
+ # elsif not(File.readable?(preproc_expname))
175
+ # $stderr.puts "Error in the experiment file:"
176
+ # $stderr.puts "Parameter #{preproc_param} has to be a readable file."
177
+ # exit 1
178
+ # end
179
+ # preproc_exp = FrPrepConfigData.new(preproc_expname)
180
+ # @exp.adjoin(preproc_exp)
178
181
 
179
182
  # announce the task
180
183
  $stderr.puts "---------"
@@ -18,7 +18,7 @@ require "rosy/RosyPruning"
18
18
  require "common/ML"
19
19
 
20
20
  # Frprep packages
21
- require "common/FrPrepConfigData"
21
+ #require "common/prep_config_data"
22
22
 
23
23
  class RosyTrain < RosyTask
24
24
 
@@ -68,18 +68,20 @@ class RosyTrain < RosyTask
68
68
 
69
69
  ##
70
70
  # add preprocessing information to the experiment file object
71
- preproc_expname = @exp.get("preproc_descr_file_train")
72
- if not(preproc_expname)
73
- $stderr.puts "Please set the name of the preprocessing exp. file name"
74
- $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
75
- exit 1
76
- elsif not(File.readable?(preproc_expname))
77
- $stderr.puts "Error in the experiment file:"
78
- $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
79
- exit 1
80
- end
81
- preproc_exp = FrPrepConfigData.new(preproc_expname)
82
- @exp.adjoin(preproc_exp)
71
+ # @note AB: Commented out due to separation of PrepConfigData.
72
+ # No information seems to be required.
73
+ # preproc_expname = @exp.get("preproc_descr_file_train")
74
+ # if not(preproc_expname)
75
+ # $stderr.puts "Please set the name of the preprocessing exp. file name"
76
+ # $stderr.puts "in the experiment file, parameter preproc_descr_file_train."
77
+ # exit 1
78
+ # elsif not(File.readable?(preproc_expname))
79
+ # $stderr.puts "Error in the experiment file:"
80
+ # $stderr.puts "Parameter preproc_descr_file_train has to be a readable file."
81
+ # exit 1
82
+ # end
83
+ # preproc_exp = FrPrepConfigData.new(preproc_expname)
84
+ # @exp.adjoin(preproc_exp)
83
85
 
84
86
 
85
87
  # get_lf returns: array of pairs [classifier_name, options[array]]