shalmaneser 1.2.0.rc1 → 1.2.0.rc2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +26 -8
  3. data/doc/SB_README +57 -0
  4. data/doc/exp_files_description.txt +160 -0
  5. data/doc/fred.pdf +0 -0
  6. data/doc/index.md +120 -0
  7. data/doc/salsa_tool.pdf +0 -0
  8. data/doc/salsatigerxml.pdf +0 -0
  9. data/doc/shal_doc.pdf +0 -0
  10. data/doc/shal_lrec.pdf +0 -0
  11. data/lib/ext/maxent/Classify.class +0 -0
  12. data/lib/ext/maxent/Train.class +0 -0
  13. data/lib/frprep/TreetaggerInterface.rb +4 -4
  14. data/lib/shalmaneser/version.rb +1 -1
  15. metadata +41 -48
  16. data/test/frprep/test_opt_parser.rb +0 -94
  17. data/test/functional/functional_test_helper.rb +0 -40
  18. data/test/functional/sample_experiment_files/fred_test.salsa.erb +0 -122
  19. data/test/functional/sample_experiment_files/fred_train.salsa.erb +0 -135
  20. data/test/functional/sample_experiment_files/prp_test.salsa.erb +0 -138
  21. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +0 -120
  22. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +0 -120
  23. data/test/functional/sample_experiment_files/prp_train.salsa.erb +0 -138
  24. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +0 -138
  25. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +0 -138
  26. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +0 -257
  27. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +0 -259
  28. data/test/functional/test_fred.rb +0 -47
  29. data/test/functional/test_frprep.rb +0 -52
  30. data/test/functional/test_rosy.rb +0 -40
@@ -1,3 +1,3 @@
1
1
  module Shalmaneser
2
- VERSION = '1.2.0.rc1'
2
+ VERSION = '1.2.0.rc2'
3
3
  end
metadata CHANGED
@@ -1,83 +1,83 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shalmaneser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0.rc1
4
+ version: 1.2.0.rc2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Beliankou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-03 00:00:00.000000000 Z
11
+ date: 2014-01-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mysql
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rdoc
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
- version: 3.9.1
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
- version: 3.9.1
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: yard
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - '>='
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - '>='
66
+ - - ~>
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - '>='
73
+ - - ~>
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - '>='
80
+ - - ~>
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  description: |
@@ -98,6 +98,21 @@ extra_rdoc_files:
98
98
  - LICENSE.md
99
99
  - CHANGELOG.md
100
100
  files:
101
+ - .yardopts
102
+ - CHANGELOG.md
103
+ - LICENSE.md
104
+ - README.md
105
+ - bin/fred
106
+ - bin/frprep
107
+ - bin/rosy
108
+ - doc/SB_README
109
+ - doc/exp_files_description.txt
110
+ - doc/fred.pdf
111
+ - doc/index.md
112
+ - doc/salsa_tool.pdf
113
+ - doc/salsatigerxml.pdf
114
+ - doc/shal_doc.pdf
115
+ - doc/shal_lrec.pdf
101
116
  - lib/common/AbstractSynInterface.rb
102
117
  - lib/common/ConfigData.rb
103
118
  - lib/common/Counter.rb
@@ -128,6 +143,8 @@ files:
128
143
  - lib/common/headz.rb
129
144
  - lib/common/option_parser.rb
130
145
  - lib/common/ruby_class_extensions.rb
146
+ - lib/ext/maxent/Classify.class
147
+ - lib/ext/maxent/Train.class
131
148
  - lib/fred/Baseline.rb
132
149
  - lib/fred/FileZipped.rb
133
150
  - lib/fred/FredBOWContext.rb
@@ -201,35 +218,15 @@ files:
201
218
  - lib/rosy/opt_parser.rb
202
219
  - lib/rosy/rosy.rb
203
220
  - lib/shalmaneser/version.rb
204
- - README.md
205
- - LICENSE.md
206
- - CHANGELOG.md
207
- - .yardopts
208
- - test/frprep/test_opt_parser.rb
209
- - test/functional/functional_test_helper.rb
210
- - test/functional/test_fred.rb
211
- - test/functional/test_frprep.rb
212
- - test/functional/test_rosy.rb
213
- - test/functional/sample_experiment_files/fred_test.salsa.erb
214
- - test/functional/sample_experiment_files/fred_train.salsa.erb
215
- - test/functional/sample_experiment_files/prp_test.salsa.erb
216
- - test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb
217
- - test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb
218
- - test/functional/sample_experiment_files/prp_train.salsa.erb
219
- - test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb
220
- - test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb
221
- - test/functional/sample_experiment_files/rosy_test.salsa.erb
222
- - test/functional/sample_experiment_files/rosy_train.salsa.erb
223
- - bin/frprep
224
- - bin/fred
225
- - bin/rosy
226
221
  homepage: https://github.com/arbox/shalmaneser
227
222
  licenses:
228
223
  - GPL-2.0
229
- metadata: {}
224
+ metadata:
225
+ issue_tracker: https://github.com/arbox/shalmaneser/issues
226
+ homepage: http://bu.chsta.be/projects/shalmaneser/
230
227
  post_install_message: |2+
231
228
 
232
- Thank you for installing Shalmaneser 1.2.0.rc1!
229
+ Thank you for installing Shalmaneser 1.2.0.rc2!
233
230
 
234
231
  This software package has multiple external dependencies:
235
232
  - OpenNLP Maximum Entropy Classifier;
@@ -239,8 +236,8 @@ post_install_message: |2+
239
236
  - TreeTagger;
240
237
  - MySQL Database Server etc.
241
238
 
242
- Please proceede to installation instructions on our wiki:
243
- https://github.com/arbox/shalmaneser/wiki/Installation
239
+ Please proceede to installation instructions:
240
+ https://github.com/arbox/shalmaneser/blob/1.2/doc/index.md
244
241
 
245
242
  If you find any bugs or have questions consider opeing a ticket:
246
243
  https://github.com/arbox/shalmaneser/issues
@@ -260,16 +257,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
260
257
  - - '>'
261
258
  - !ruby/object:Gem::Version
262
259
  version: 1.3.1
263
- requirements: []
260
+ requirements:
261
+ - mysql-server
264
262
  rubyforge_project:
265
- rubygems_version: 2.0.14
263
+ rubygems_version: 2.2.0
266
264
  signing_key:
267
265
  specification_version: 4
268
266
  summary: SHALMANESER - SHALlow seMANtic parSER
269
- test_files:
270
- - test/frprep/test_opt_parser.rb
271
- - test/functional/functional_test_helper.rb
272
- - test/functional/test_fred.rb
273
- - test/functional/test_frprep.rb
274
- - test/functional/test_rosy.rb
267
+ test_files: []
275
268
  has_rdoc:
@@ -1,94 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'test/unit'
4
- require 'stringio' # for helper methods
5
- require 'frprep/opt_parser'
6
-
7
- include FrPrep
8
-
9
- class TestOptParser < Test::Unit::TestCase
10
-
11
- def setup
12
- @exp_file = 'test/frprep/data/prp_test.salsa'
13
- @valid_opts = ['--expfile', @exp_file,
14
- '--help'
15
- ]
16
- end
17
-
18
- def test_public_methods
19
- assert_respond_to(OptParser, :parse)
20
- end
21
-
22
- # It should return a FrPrepConfigData object.
23
- def test_parse_method
24
- input = ['-e', @exp_file]
25
- return_value = OptParser.parse(input)
26
- assert(return_value.instance_of?(FrPrepConfigData))
27
- end
28
-
29
- # It should reject the empty input and exit.
30
- def test_empty_input
31
- out, err = intercept_output do
32
- assert_raises(SystemExit) { OptParser.parse([]) }
33
- end
34
- assert_match(/You have to provide some options./, err)
35
- end
36
-
37
- # It should accept correct options.
38
- # Invalid options is the matter of OptionParser itself,
39
- # do not test it here.
40
- # We test only, that OP exits and does not raise an exception.
41
- def test_accept_correct_options
42
- # this options we should treat separately
43
- @valid_opts.delete('--help')
44
- assert_nothing_raised { OptParser.parse(@valid_opts) }
45
-
46
- stdout, stderr = intercept_output do
47
- assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
48
- end
49
-
50
- assert_match(/You have provided an invalid option:/, stderr)
51
- end
52
-
53
- # It should successfully exit with some options.
54
- def test_successful_exit
55
- quietly do
56
- success_args = ['-h', '--help']
57
- success_args.each do |arg|
58
- assert_raises(SystemExit) { OptParser.parse(arg.split) }
59
- end
60
- end
61
- end
62
-
63
- end
64
- ################################################################################
65
- # It is a helper method, many testable units provide some verbose output
66
- # to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
67
- def quietly(&b)
68
- begin
69
- orig_stderr = $stderr.clone
70
- orig_stdout = $stdout.clone
71
- $stderr.reopen(File.new('/dev/null', 'w'))
72
- $stdout.reopen(File.new('/dev/null', 'w'))
73
- b.call
74
- ensure
75
- $stderr.reopen(orig_stderr)
76
- $stdout.reopen(orig_stdout)
77
- end
78
- end
79
-
80
- # It is a helper method for handling stdout and stderr as strings.
81
- def intercept_output
82
- orig_stdout = $stdout
83
- orig_stderr = $stderr
84
- $stdout = StringIO.new
85
- $stderr = StringIO.new
86
-
87
- yield
88
-
89
- return $stdout.string, $stderr.string
90
- ensure
91
- $stdout = orig_stdout
92
- $stderr = orig_stderr
93
- end
94
-
@@ -1,40 +0,0 @@
1
- require 'erb'
2
-
3
- module FunctionalTestHelper
4
- PREF = 'test/functional/sample_experiment_files'
5
-
6
- PRP_TEST_FILE = 'test/functional/sample_experiment_files/prp_test.salsa'
7
- PRP_TEST_FILE_FRED_STD = "#{PREF}/prp_test.salsa.fred.standalone"
8
- PRP_TEST_FILE_ROSY_STD = "#{PREF}/prp_test.salsa.rosy.standalone"
9
- PRP_TRAIN_FILE = 'test/functional/sample_experiment_files/prp_train.salsa'
10
- PRP_TRAIN_FILE_FRED_STD = "#{PREF}/prp_train.salsa.fred.standalone"
11
- PRP_TRAIN_FILE_ROSY_STD = "#{PREF}/prp_train.salsa.rosy.standalone"
12
-
13
- FRED_TEST_FILE = 'test/functional/sample_experiment_files/fred_test.salsa'
14
- FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
15
- ROSY_TEST_FILE = 'test/functional/sample_experiment_files/rosy_test.salsa'
16
- ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
17
-
18
- # Run an external process for functional testing and check the return code.
19
- # <system> returns <true> if the external code exposes no errors.
20
- # <@msg> is defined for every test object.
21
- def execute(cmd)
22
- status = system(cmd)
23
- assert(status, @msg)
24
- end
25
-
26
- # Create a temporary exp file only for this test.
27
- # Shalmaneser needs absolute paths, we provide them in exp files
28
- # using templating.
29
- def create_exp_file(file)
30
- template = File.read("#{file}.erb")
31
- text = ERB.new(template).result
32
- File.open(file, 'w') do |f|
33
- f.write(text)
34
- end
35
- end
36
-
37
- def remove_exp_file(file)
38
- File.delete(file)
39
- end
40
- end
@@ -1,122 +0,0 @@
1
- experiment_ID = fred_test
2
-
3
- apply_to_all_known_targets = true
4
-
5
- enduser_mode = false
6
-
7
- verbose = true
8
-
9
-
10
- ############################
11
- # Paths
12
- # - fred_directory: directory where Fred puts its internal data
13
- # - directory_output:
14
- # redirect system output of disambiguated text (in SalsaTigerXML)
15
- # to another directory.
16
- # If you do not set anything here, output is to
17
- # <fred_directory>/<experiment_ID>/output/stxml
18
- # - classifier_dir:
19
- # Write trained classifiers to this directory.
20
- # If you do not set this parameter, classifiers are written to
21
- # <fred_directory>/<experiment_ID>/classifiers
22
-
23
- fred_directory = <%= File.expand_path('test/functional/output') %>
24
- classifier_dir = <%= File.expand_path('test/functional/input/fred/cls') %>
25
- # - preproc_descr_file_train / ...test
26
- # where the experiment file for frprep is located
27
- # (preprocessing for Fred and Rosy)
28
- # for the preprocessing of the data used in this experiment
29
- #
30
- # give one preprocessing file name for the training data
31
- # and one for the test data
32
- # (If you only ever use test data in this experiment, you only
33
- # need to give preproc_descr_file_test, and vice versa for training data.)
34
-
35
- preproc_descr_file_test = <%= File.expand_path('test/functional/sample_experiment_files/prp_test.salsa.fred.standalone') %>
36
-
37
- #####################
38
- # noncontiguous input?
39
- # if so, set 'noncontiguous_input' to 'true' (default is 'false')
40
- # Also give the larger corpus from which the input sentences are:
41
- # - directory
42
- # - format: same possibilities as for frprep format
43
- # - encoding: same possibilities as for frprep encoding
44
-
45
- noncontiguous_input = false
46
- #larger_corpus_dir =
47
- larger_corpus_format = SalsaTigerXML
48
- #larger_corpus_encoding = iso
49
-
50
-
51
- #################
52
- # Features
53
-
54
- # bag-of-words context, with given context size,
55
- # for example:
56
- feature = context 50
57
- feature = context 2
58
- #
59
- # (you can give more than one context feature line!)
60
- #
61
- # other possible features:
62
- # feature = syntax
63
- # feature = synsem
64
- #
65
- # syntax: grammatical functions
66
- # synsem: grammatical functions plus headwords
67
-
68
- #feature = context % %contextsize%
69
- feature = syntax
70
-
71
- # How to handle training data that is labeled
72
- # with multiple sense labels?
73
- # - binarize (default): This works only with binary classifiers.
74
- # When featurizing for the binary classifiers, consider an item
75
- # positive if its set of assigned labels includes the
76
- # label for this binary classifier.
77
- # - repeat: Repeat the instance, once for each
78
- # sense label that has been assigned. (Basically, treat it
79
- # as N instances with equal features but different labels.)
80
- # - join: join all the assigned senses into one combined sense
81
- # and treat that as a separate sense to train on.
82
- # - keep: keep as multiple sense labels. (Note that this
83
- # makes sense only for classifiers that can deal with
84
- # multiple labels.)
85
-
86
- #handle_multilabel = binarize
87
- handle_multilabel = repeat
88
-
89
- # What to do with numerical features?
90
- # - keep: just leave as is
91
- # - repeat: for a feature with max. numerical value N,
92
- # use N binary features
93
- # - bin: use a fixed number of bins, e.g. 5, then
94
- # if feature value > 20: set all bins to 1,
95
- # if feature value > 10: set the first four bins to 1,
96
- # etc.
97
- # default: bin.
98
- #numerical_features = bin
99
- numerical_features = keep
100
-
101
- # Binary classifiers, or n-ary classifiers?
102
- # if binary classifiers, set 'binary_classifiers = true'
103
- # default is 'false'.
104
- binary_classifiers = false
105
-
106
- #################
107
- # Fred internal settings
108
-
109
- # what kind of classifier to use?
110
- #
111
- # format:
112
- # <classifier type> <path> <optionally another path>
113
- #
114
- # for maxent, give first the path where maxent resides,
115
- # then <where_shalmaneser_resides>/program/tools/maxent
116
- classifier = maxent <%= File.expand_path('tools/maxent/maxent-2.4.0') %>
117
-
118
-
119
- # for binary classifiers, you can set the pseudolabel
120
- # on the 'negative' sense.
121
- # Default is 'NONE'
122
- negsense = NONE