shalmaneser 1.2.0.rc3 → 1.2.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +26 -7
  3. data/bin/fred +2 -4
  4. data/doc/exp_files.md +6 -5
  5. data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
  6. data/lib/common/config_format_element.rb +220 -0
  7. data/lib/common/prep_config_data.rb +62 -0
  8. data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
  9. data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
  10. data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
  11. data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
  12. data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
  13. data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
  14. data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
  15. data/lib/fred/FredBOWContext.rb +8 -6
  16. data/lib/fred/FredDetermineTargets.rb +1 -1
  17. data/lib/fred/FredEval.rb +1 -1
  18. data/lib/fred/FredFeaturize.rb +22 -16
  19. data/lib/fred/FredTest.rb +0 -1
  20. data/lib/fred/fred.rb +2 -0
  21. data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
  22. data/lib/fred/opt_parser.rb +1 -1
  23. data/lib/frprep/frprep.rb +1 -1
  24. data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
  25. data/lib/frprep/opt_parser.rb +1 -1
  26. data/lib/rosy/ExternalConfigData.rb +1 -1
  27. data/lib/rosy/RosyEval.rb +1 -1
  28. data/lib/rosy/RosyFeaturize.rb +21 -20
  29. data/lib/rosy/RosyInspect.rb +1 -1
  30. data/lib/rosy/RosyPruning.rb +1 -1
  31. data/lib/rosy/RosyServices.rb +1 -1
  32. data/lib/rosy/RosySplit.rb +1 -1
  33. data/lib/rosy/RosyTest.rb +23 -20
  34. data/lib/rosy/RosyTrain.rb +15 -13
  35. data/lib/rosy/RosyTrainingTestTable.rb +2 -1
  36. data/lib/rosy/View.rb +1 -1
  37. data/lib/rosy/opt_parser.rb +1 -1
  38. data/lib/rosy/rosy.rb +1 -1
  39. data/lib/rosy/rosy_config_data.rb +121 -0
  40. data/lib/shalmaneser/opt_parser.rb +32 -2
  41. data/lib/shalmaneser/version.rb +1 -1
  42. metadata +23 -114
  43. checksums.yaml.gz.sig +0 -0
  44. data.tar.gz.sig +0 -0
  45. data/lib/common/FrPrepConfigData.rb +0 -66
  46. data/lib/rosy/RosyConfigData.rb +0 -115
  47. metadata.gz.sig +0 -0
@@ -33,9 +33,10 @@
33
33
 
34
34
  require "common/ruby_class_extensions"
35
35
 
36
- require "rosy/DBTable"
36
+ require 'db/db_table'
37
37
  require "rosy/FeatureInfo"
38
38
 
39
+ # @note AB: Possibly this file belongs to <lib/db>. Check it!
39
40
  ######################
40
41
  class RosyTrainingTestTable
41
42
  attr_reader :database, :maintable_name, :feature_names, :feature_info
data/lib/rosy/View.rb CHANGED
@@ -11,7 +11,7 @@
11
11
  # It can be returned directly, or modified by some "dynamic feature object",
12
12
  # and its value (modified or unmodified) will always be last in the array representation of a row.
13
13
 
14
- require "common/SQLQuery"
14
+ require 'db/sql_query'
15
15
  require "common/ruby_class_extensions"
16
16
  require "common/RosyConventions"
17
17
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'getoptlong'
4
4
 
5
- require 'rosy/RosyConfigData'
5
+ require 'rosy/rosy_config_data'
6
6
 
7
7
  module Rosy
8
8
 
data/lib/rosy/rosy.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # AB: 2011-11-14
2
2
  # Initial import done, need to reimplement the whole interface.
3
3
 
4
- require 'common/DBInterface'
4
+ require 'db/db_interface'
5
5
  require 'rosy/RosyFeaturize'
6
6
  require 'rosy/RosyTest'
7
7
  require 'rosy/RosyTrain'
@@ -0,0 +1,121 @@
1
+ require 'common/config_data'
2
+
3
+ ##############################
4
+ # Class RosyConfigData
5
+ #
6
+ # inherits from ConfigData,
7
+ # sets features for ROSY
8
+
9
+ class RosyConfigData < ConfigData
10
+ CONFIG_DEFS = { # features
11
+ "feature" => "list",
12
+ "classifier" => "list",
13
+
14
+ "verbose" => "bool" ,
15
+ "enduser_mode" => "bool",
16
+
17
+ "experiment_ID" => "string",
18
+
19
+ "directory_input_train" => "string",
20
+ "directory_input_test" => "string",
21
+ "directory_output" => "string",
22
+
23
+ "preproc_descr_file_train" => "string",
24
+ "preproc_descr_file_test" => "string",
25
+ "external_descr_file" => "string",
26
+
27
+ "dbtype" => "string", # "mysql" or "sqlite"
28
+
29
+ "host" => "string", # DB access: sqlite only
30
+ "user" => "string",
31
+ "passwd" => "string",
32
+ "dbname" => "string",
33
+
34
+ "data_dir" => "string", # for external use
35
+ "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
36
+
37
+ "classifier_dir" => "string", # if present, special directory for classifiers
38
+
39
+ "classif_column_name" => "string",
40
+ "main_table_name" => "pattern",
41
+ "test_table_name" => "pattern",
42
+
43
+ "eval_file" => "pattern",
44
+ "log_file" => "pattern",
45
+ "failed_file" => "pattern",
46
+ "classifier_file" => "pattern",
47
+ "classifier_output_file" => "pattern",
48
+ "noval" => "string",
49
+
50
+
51
+ "split_nones" => "bool",
52
+ "print_eval_log" => "bool",
53
+ "assume_argrec_perfect" => "bool",
54
+ "xwise_argrec" => "string",
55
+ "xwise_arglab" => "string",
56
+ "xwise_onestep" => "string",
57
+
58
+ "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
59
+ "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
60
+ "prune" => "string", # pruning prior to argrec?
61
+
62
+ # Imported from PrepConfigData
63
+ 'do_postag' => 'bool',
64
+ 'do_lemmatize' => 'bool',
65
+ 'do_parse' => 'bool',
66
+ 'pos_tagger' => 'string',
67
+ 'lemmatizer' => 'string',
68
+ 'parser' => 'string'
69
+ }
70
+
71
+ def initialize(filename)
72
+ super(filename, CONFIG_DEFS, ["exp_ID", "test_ID", "split_ID",
73
+ "feature_name", "classif", "step",
74
+ "group", "dataset","mode"])
75
+
76
+ # set access functions for list features
77
+ set_list_feature_access("feature",
78
+ method("access_feature"))
79
+
80
+ # set access functions for list features
81
+ set_list_feature_access("classifier",
82
+ method("access_feature"))
83
+
84
+ end
85
+
86
+ ###
87
+ # protected
88
+
89
+ #####
90
+ # access_feature
91
+ #
92
+ # access function for feature 'feature'
93
+ #
94
+ # assumed format in the config file:
95
+ #
96
+ # feature = path [option]*
97
+ #
98
+ # i.e. first the name of the feature type to use, then
99
+ # optionally options associated with that feature,
100
+ # e.g. 'argrec': use that feature only when computing argrec
101
+ #
102
+ # the access function is called with parameter val_list, an array of
103
+ # string tuples, one string tuple for each feature defined.
104
+ # the first string in the tuple is the feature name, the rest are the options
105
+ #
106
+ # returns: a list of pairs [feature_name(string), options(array:string)]
107
+ # of defined features
108
+ def access_feature(val_list) # array:array:string: list of tuples defined in config file
109
+ # for feature 'feature'
110
+ if val_list.nil?
111
+ []
112
+ else
113
+ val_list.map do |feature_descr_tuple|
114
+ [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+
121
+
@@ -4,6 +4,12 @@ require 'shalmaneser/version'
4
4
 
5
5
  module Shalmaneser
6
6
  class OptParser
7
+
8
+ # Specify a default option first.
9
+ ENCODINGS = %w{iso utf8 hex}
10
+ LANGUAGES = %w{de en}
11
+ PARSERS = %w{BerkeleyParser StanfordParser CollinsParser}
12
+
7
13
  def self.parse(cmd_args)
8
14
 
9
15
  parser = create_parser
@@ -32,10 +38,34 @@ module Shalmaneser
32
38
 
33
39
  def self.create_parser
34
40
  OptionParser.new do |opts|
35
- opts.banner = 'Usage: shalmaneser OPTIONS'
41
+ opts.banner = "CAUTION: Shalmaneser DOES NOT work in Enduser Mode for now!\n" +
42
+ 'Usage: shalmaneser -i path [-o path -e enc -l lang -p parser]'
43
+ opts.separator ''
44
+ opts.separator 'Mandatory options:'
45
+ opts.on('-i', '--input INPUTPATH', String,
46
+ 'Path to directory with input files.')
47
+ opts.separator ''
48
+
49
+ opts.separator 'Facultative options:'
50
+ opts.on('-o', '--output OUTPUTPATH', String,
51
+ 'Path to directory for output files.',
52
+ 'If not set it defaults to <users home directory>.')
53
+ opts.on('-e', '--encoding ENCODING', ENCODINGS,
54
+ "Encoding of input files. Allowed encodings are: #{ENCODINGS.join(', ')}.",
55
+ "If not set it defaults to <#{ENCODINGS.first}>.")
56
+ opts.on('-l', '--language LANGUAGE', LANGUAGES,
57
+ "Language to be processed. Allowed language are: #{LANGUAGES.join(', ')}.",
58
+ "If not set it defaults to <#{LANGUAGES.first}>.")
59
+ opts.on('-p', '--parser PARSER', PARSERS,
60
+ "Parser name you want to use.",
61
+ "Implemented parsers are: #{PARSERS.join(', ')}.",
62
+ "If not set it defaults to <#{PARSERS.first}>.")
63
+ opts.on('--visualize', 'Open output files with SALTO.',
64
+ 'This is ignored if SALTO is not found on your system.')
65
+
36
66
  opts.separator ''
37
67
  opts.separator 'Common options:'
38
-
68
+
39
69
  opts.on_tail('-h', '--help', 'Show the help message.') do
40
70
  puts opts
41
71
  exit
@@ -1,3 +1,3 @@
1
1
  module Shalmaneser
2
- VERSION = '1.2.0.rc3'
2
+ VERSION = '1.2.0.rc4'
3
3
  end
metadata CHANGED
@@ -1,107 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shalmaneser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0.rc3
4
+ version: 1.2.0.rc4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Beliankou
8
8
  autorequire:
9
9
  bindir: bin
10
- cert_chain:
11
- - |
12
- -----BEGIN CERTIFICATE-----
13
- MIIDZDCCAkygAwIBAgIBATANBgkqhkiG9w0BAQUFADA8MQ4wDAYDVQQDDAVhcmJv
14
- eDEWMBQGCgmSJomT8ixkARkWBnlhbmRleDESMBAGCgmSJomT8ixkARkWAnJ1MB4X
15
- DTE0MDEwNjE1NDU0MFoXDTE1MDEwNjE1NDU0MFowPDEOMAwGA1UEAwwFYXJib3gx
16
- FjAUBgoJkiaJk/IsZAEZFgZ5YW5kZXgxEjAQBgoJkiaJk/IsZAEZFgJydTCCASIw
17
- DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKpdkXWo8sFAq/Dd+rCLRCKHpH02
18
- 8cZsiy3Dx5kt9qpjYn/LX4/QlJ2mc2C3QXUr++DFJjA0K3TcRS2esUVS9ZlNMDM9
19
- YQnxFmPJ4tfpsMiteQMBVqU643aZrh64rqddklg8BwRec+prIIDxfQHzXalnNBad
20
- YfiHhjgTh5YQsx3Q0zidhlAtsIbJljaNLuJ4DiVQUtjumEnOI0HTLTuUdpg/Hhh+
21
- nPlnhwOUBGzj5hUGzf9QcbV2k99KXsKlHQVkMDn7gsXuIKsisVde07lUbhhR7YGy
22
- Z3vGnZK7oNI0It0LIBm7pdx2gtB4YG9O5QKEJo0WzLY60TiY8DzDguLndIcCAwEA
23
- AaNxMG8wCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAwHQYDVR0OBBYEFHhWOk+TWhtU
24
- KMnM8ZyfBZYcVXxDMBoGA1UdEQQTMBGBD2FyYm94QHlhbmRleC5ydTAaBgNVHRIE
25
- EzARgQ9hcmJveEB5YW5kZXgucnUwDQYJKoZIhvcNAQEFBQADggEBAF2Y+mc/uTug
26
- OX3ivVkD4AaPpFsB2EglJhQxivlAHkix593RpZPXNf6jeu36oRCV/vRFLkzzaZ73
27
- N7MaI5Z2HczDkZvi8ZZM5L3p4wHttquranUdI3bZv4SiAVFmhkeFZLSp6pFf/Fmg
28
- qmEeXWVbsCIhYI7KYQ0XKbnRuj9AmjUEoMBZPnMsM1S/R+dBQfrUszXROWqxaENA
29
- 728ScNHCmRYuNutDO9yRDJT1SRumpgwH4df6c0LHBCuXuQTWODYqc/CDZJJb9Tfi
30
- BJreIpPMe0KFMphkN/x5cHkRDtMoY+rBGcqRe60otCEsAHdM+CXox9tAREnr/4lT
31
- Jn9sRDVszy4=
32
- -----END CERTIFICATE-----
33
- date: 2014-01-11 00:00:00.000000000 Z
34
- dependencies:
35
- - !ruby/object:Gem::Dependency
36
- name: mysql
37
- requirement: !ruby/object:Gem::Requirement
38
- requirements:
39
- - - '>='
40
- - !ruby/object:Gem::Version
41
- version: '0'
42
- type: :runtime
43
- prerelease: false
44
- version_requirements: !ruby/object:Gem::Requirement
45
- requirements:
46
- - - '>='
47
- - !ruby/object:Gem::Version
48
- version: '0'
49
- - !ruby/object:Gem::Dependency
50
- name: rdoc
51
- requirement: !ruby/object:Gem::Requirement
52
- requirements:
53
- - - '>='
54
- - !ruby/object:Gem::Version
55
- version: '0'
56
- type: :development
57
- prerelease: false
58
- version_requirements: !ruby/object:Gem::Requirement
59
- requirements:
60
- - - '>='
61
- - !ruby/object:Gem::Version
62
- version: '0'
63
- - !ruby/object:Gem::Dependency
64
- name: bundler
65
- requirement: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - '>='
68
- - !ruby/object:Gem::Version
69
- version: '0'
70
- type: :development
71
- prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- requirements:
74
- - - '>='
75
- - !ruby/object:Gem::Version
76
- version: '0'
77
- - !ruby/object:Gem::Dependency
78
- name: yard
79
- requirement: !ruby/object:Gem::Requirement
80
- requirements:
81
- - - '>='
82
- - !ruby/object:Gem::Version
83
- version: '0'
84
- type: :development
85
- prerelease: false
86
- version_requirements: !ruby/object:Gem::Requirement
87
- requirements:
88
- - - '>='
89
- - !ruby/object:Gem::Version
90
- version: '0'
91
- - !ruby/object:Gem::Dependency
92
- name: rake
93
- requirement: !ruby/object:Gem::Requirement
94
- requirements:
95
- - - '>='
96
- - !ruby/object:Gem::Version
97
- version: '0'
98
- type: :development
99
- prerelease: false
100
- version_requirements: !ruby/object:Gem::Requirement
101
- requirements:
102
- - - '>='
103
- - !ruby/object:Gem::Version
104
- version: '0'
10
+ cert_chain: []
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
+ dependencies: []
105
13
  description: |
106
14
  SHALMANESER - SHALlow seMANtic parSER. This package provides a toolbox for
107
15
  Semantic Role Labeling (SRL). SHALMANESER uses supervised learning algorithms to
@@ -123,7 +31,7 @@ extra_rdoc_files:
123
31
  - doc/exp_files.md
124
32
  - doc/index.md
125
33
  files:
126
- - .yardopts
34
+ - ".yardopts"
127
35
  - CHANGELOG.md
128
36
  - LICENSE.md
129
37
  - README.md
@@ -134,13 +42,10 @@ files:
134
42
  - doc/exp_files.md
135
43
  - doc/index.md
136
44
  - lib/common/AbstractSynInterface.rb
137
- - lib/common/ConfigData.rb
138
45
  - lib/common/Counter.rb
139
- - lib/common/DBInterface.rb
140
46
  - lib/common/EnduserMode.rb
141
47
  - lib/common/Eval.rb
142
48
  - lib/common/FixSynSemMapping.rb
143
- - lib/common/FrPrepConfigData.rb
144
49
  - lib/common/Graph.rb
145
50
  - lib/common/ISO-8859-1.rb
146
51
  - lib/common/ML.rb
@@ -150,7 +55,6 @@ files:
150
55
  - lib/common/Parser.rb
151
56
  - lib/common/RegXML.rb
152
57
  - lib/common/RosyConventions.rb
153
- - lib/common/SQLQuery.rb
154
58
  - lib/common/STXmlTerminalOrder.rb
155
59
  - lib/common/SalsaTigerRegXML.rb
156
60
  - lib/common/SalsaTigerXMLHelper.rb
@@ -159,16 +63,24 @@ files:
159
63
  - lib/common/Tiger.rb
160
64
  - lib/common/Timbl.rb
161
65
  - lib/common/Tree.rb
162
- - lib/common/frprep_helper.rb
66
+ - lib/common/config_data.rb
67
+ - lib/common/config_format_element.rb
163
68
  - lib/common/headz.rb
164
69
  - lib/common/option_parser.rb
70
+ - lib/common/prep_config_data.rb
71
+ - lib/common/prep_helper.rb
165
72
  - lib/common/ruby_class_extensions.rb
73
+ - lib/db/db_interface.rb
74
+ - lib/db/db_mysql.rb
75
+ - lib/db/db_sqlite.rb
76
+ - lib/db/db_table.rb
77
+ - lib/db/db_wrapper.rb
78
+ - lib/db/sql_query.rb
166
79
  - lib/ext/maxent/Classify.class
167
80
  - lib/ext/maxent/Train.class
168
81
  - lib/fred/Baseline.rb
169
82
  - lib/fred/FileZipped.rb
170
83
  - lib/fred/FredBOWContext.rb
171
- - lib/fred/FredConfigData.rb
172
84
  - lib/fred/FredConventions.rb
173
85
  - lib/fred/FredDetermineTargets.rb
174
86
  - lib/fred/FredEval.rb
@@ -183,6 +95,7 @@ files:
183
95
  - lib/fred/FredTrain.rb
184
96
  - lib/fred/PlotAndREval.rb
185
97
  - lib/fred/fred.rb
98
+ - lib/fred/fred_config_data.rb
186
99
  - lib/fred/md5.rb
187
100
  - lib/fred/opt_parser.rb
188
101
  - lib/frprep/Ampersand.rb
@@ -208,17 +121,12 @@ files:
208
121
  - lib/frprep/opt_parser.rb
209
122
  - lib/frprep/ruby_class_extensions.rb
210
123
  - lib/rosy/AbstractFeatureAndExternal.rb
211
- - lib/rosy/DBMySQL.rb
212
- - lib/rosy/DBSQLite.rb
213
- - lib/rosy/DBTable.rb
214
- - lib/rosy/DBWrapper.rb
215
124
  - lib/rosy/ExternalConfigData.rb
216
125
  - lib/rosy/FailedParses.rb
217
126
  - lib/rosy/FeatureInfo.rb
218
127
  - lib/rosy/GfInduce.rb
219
128
  - lib/rosy/GfInduceFeature.rb
220
129
  - lib/rosy/InputData.rb
221
- - lib/rosy/RosyConfigData.rb
222
130
  - lib/rosy/RosyConfusability.rb
223
131
  - lib/rosy/RosyEval.rb
224
132
  - lib/rosy/RosyFeatureExtractors.rb
@@ -237,6 +145,7 @@ files:
237
145
  - lib/rosy/View.rb
238
146
  - lib/rosy/opt_parser.rb
239
147
  - lib/rosy/rosy.rb
148
+ - lib/rosy/rosy_config_data.rb
240
149
  - lib/shalmaneser/opt_parser.rb
241
150
  - lib/shalmaneser/version.rb
242
151
  homepage: http://bu.chsta.be/projects/shalmaneser/
@@ -246,7 +155,7 @@ metadata:
246
155
  issue_tracker: https://github.com/arbox/shalmaneser/issues
247
156
  post_install_message: |2+
248
157
 
249
- Thank you for installing Shalmaneser 1.2.0.rc3!
158
+ Thank you for installing Shalmaneser 1.2.0.rc4!
250
159
 
251
160
  This software package has multiple external dependencies:
252
161
  - OpenNLP Maximum Entropy Classifier;
@@ -263,24 +172,24 @@ post_install_message: |2+
263
172
  https://github.com/arbox/shalmaneser/issues
264
173
 
265
174
  rdoc_options:
266
- - -m
175
+ - "-m"
267
176
  - README.md
268
177
  require_paths:
269
178
  - lib
270
179
  required_ruby_version: !ruby/object:Gem::Requirement
271
180
  requirements:
272
- - - '>='
181
+ - - '='
273
182
  - !ruby/object:Gem::Version
274
- version: 1.8.7
183
+ version: '2.0'
275
184
  required_rubygems_version: !ruby/object:Gem::Requirement
276
185
  requirements:
277
- - - '>'
186
+ - - ">"
278
187
  - !ruby/object:Gem::Version
279
188
  version: 1.3.1
280
189
  requirements:
281
190
  - mysql-server
282
191
  rubyforge_project:
283
- rubygems_version: 2.2.0
192
+ rubygems_version: 2.4.5
284
193
  signing_key:
285
194
  specification_version: 4
286
195
  summary: SHALMANESER - SHALlow seMANtic parSER