shalmaneser 1.2.0.rc1 → 1.2.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -8
- data/doc/SB_README +57 -0
- data/doc/exp_files_description.txt +160 -0
- data/doc/fred.pdf +0 -0
- data/doc/index.md +120 -0
- data/doc/salsa_tool.pdf +0 -0
- data/doc/salsatigerxml.pdf +0 -0
- data/doc/shal_doc.pdf +0 -0
- data/doc/shal_lrec.pdf +0 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/frprep/TreetaggerInterface.rb +4 -4
- data/lib/shalmaneser/version.rb +1 -1
- metadata +41 -48
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -40
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +0 -122
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +0 -135
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +0 -138
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +0 -120
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +0 -120
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +0 -138
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +0 -138
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +0 -138
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +0 -257
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +0 -259
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -52
- data/test/functional/test_rosy.rb +0 -40
data/lib/shalmaneser/version.rb
CHANGED
metadata
CHANGED
@@ -1,83 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shalmaneser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.0.
|
4
|
+
version: 1.2.0.rc2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Beliankou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mysql
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rdoc
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: yard
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rake
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ~>
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
description: |
|
@@ -98,6 +98,21 @@ extra_rdoc_files:
|
|
98
98
|
- LICENSE.md
|
99
99
|
- CHANGELOG.md
|
100
100
|
files:
|
101
|
+
- .yardopts
|
102
|
+
- CHANGELOG.md
|
103
|
+
- LICENSE.md
|
104
|
+
- README.md
|
105
|
+
- bin/fred
|
106
|
+
- bin/frprep
|
107
|
+
- bin/rosy
|
108
|
+
- doc/SB_README
|
109
|
+
- doc/exp_files_description.txt
|
110
|
+
- doc/fred.pdf
|
111
|
+
- doc/index.md
|
112
|
+
- doc/salsa_tool.pdf
|
113
|
+
- doc/salsatigerxml.pdf
|
114
|
+
- doc/shal_doc.pdf
|
115
|
+
- doc/shal_lrec.pdf
|
101
116
|
- lib/common/AbstractSynInterface.rb
|
102
117
|
- lib/common/ConfigData.rb
|
103
118
|
- lib/common/Counter.rb
|
@@ -128,6 +143,8 @@ files:
|
|
128
143
|
- lib/common/headz.rb
|
129
144
|
- lib/common/option_parser.rb
|
130
145
|
- lib/common/ruby_class_extensions.rb
|
146
|
+
- lib/ext/maxent/Classify.class
|
147
|
+
- lib/ext/maxent/Train.class
|
131
148
|
- lib/fred/Baseline.rb
|
132
149
|
- lib/fred/FileZipped.rb
|
133
150
|
- lib/fred/FredBOWContext.rb
|
@@ -201,35 +218,15 @@ files:
|
|
201
218
|
- lib/rosy/opt_parser.rb
|
202
219
|
- lib/rosy/rosy.rb
|
203
220
|
- lib/shalmaneser/version.rb
|
204
|
-
- README.md
|
205
|
-
- LICENSE.md
|
206
|
-
- CHANGELOG.md
|
207
|
-
- .yardopts
|
208
|
-
- test/frprep/test_opt_parser.rb
|
209
|
-
- test/functional/functional_test_helper.rb
|
210
|
-
- test/functional/test_fred.rb
|
211
|
-
- test/functional/test_frprep.rb
|
212
|
-
- test/functional/test_rosy.rb
|
213
|
-
- test/functional/sample_experiment_files/fred_test.salsa.erb
|
214
|
-
- test/functional/sample_experiment_files/fred_train.salsa.erb
|
215
|
-
- test/functional/sample_experiment_files/prp_test.salsa.erb
|
216
|
-
- test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb
|
217
|
-
- test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb
|
218
|
-
- test/functional/sample_experiment_files/prp_train.salsa.erb
|
219
|
-
- test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb
|
220
|
-
- test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb
|
221
|
-
- test/functional/sample_experiment_files/rosy_test.salsa.erb
|
222
|
-
- test/functional/sample_experiment_files/rosy_train.salsa.erb
|
223
|
-
- bin/frprep
|
224
|
-
- bin/fred
|
225
|
-
- bin/rosy
|
226
221
|
homepage: https://github.com/arbox/shalmaneser
|
227
222
|
licenses:
|
228
223
|
- GPL-2.0
|
229
|
-
metadata:
|
224
|
+
metadata:
|
225
|
+
issue_tracker: https://github.com/arbox/shalmaneser/issues
|
226
|
+
homepage: http://bu.chsta.be/projects/shalmaneser/
|
230
227
|
post_install_message: |2+
|
231
228
|
|
232
|
-
Thank you for installing Shalmaneser 1.2.0.
|
229
|
+
Thank you for installing Shalmaneser 1.2.0.rc2!
|
233
230
|
|
234
231
|
This software package has multiple external dependencies:
|
235
232
|
- OpenNLP Maximum Entropy Classifier;
|
@@ -239,8 +236,8 @@ post_install_message: |2+
|
|
239
236
|
- TreeTagger;
|
240
237
|
- MySQL Database Server etc.
|
241
238
|
|
242
|
-
Please proceede to installation instructions
|
243
|
-
https://github.com/arbox/shalmaneser/
|
239
|
+
Please proceede to installation instructions:
|
240
|
+
https://github.com/arbox/shalmaneser/blob/1.2/doc/index.md
|
244
241
|
|
245
242
|
If you find any bugs or have questions consider opeing a ticket:
|
246
243
|
https://github.com/arbox/shalmaneser/issues
|
@@ -260,16 +257,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
260
257
|
- - '>'
|
261
258
|
- !ruby/object:Gem::Version
|
262
259
|
version: 1.3.1
|
263
|
-
requirements:
|
260
|
+
requirements:
|
261
|
+
- mysql-server
|
264
262
|
rubyforge_project:
|
265
|
-
rubygems_version: 2.0
|
263
|
+
rubygems_version: 2.2.0
|
266
264
|
signing_key:
|
267
265
|
specification_version: 4
|
268
266
|
summary: SHALMANESER - SHALlow seMANtic parSER
|
269
|
-
test_files:
|
270
|
-
- test/frprep/test_opt_parser.rb
|
271
|
-
- test/functional/functional_test_helper.rb
|
272
|
-
- test/functional/test_fred.rb
|
273
|
-
- test/functional/test_frprep.rb
|
274
|
-
- test/functional/test_rosy.rb
|
267
|
+
test_files: []
|
275
268
|
has_rdoc:
|
@@ -1,94 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require 'test/unit'
|
4
|
-
require 'stringio' # for helper methods
|
5
|
-
require 'frprep/opt_parser'
|
6
|
-
|
7
|
-
include FrPrep
|
8
|
-
|
9
|
-
class TestOptParser < Test::Unit::TestCase
|
10
|
-
|
11
|
-
def setup
|
12
|
-
@exp_file = 'test/frprep/data/prp_test.salsa'
|
13
|
-
@valid_opts = ['--expfile', @exp_file,
|
14
|
-
'--help'
|
15
|
-
]
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_public_methods
|
19
|
-
assert_respond_to(OptParser, :parse)
|
20
|
-
end
|
21
|
-
|
22
|
-
# It should return a FrPrepConfigData object.
|
23
|
-
def test_parse_method
|
24
|
-
input = ['-e', @exp_file]
|
25
|
-
return_value = OptParser.parse(input)
|
26
|
-
assert(return_value.instance_of?(FrPrepConfigData))
|
27
|
-
end
|
28
|
-
|
29
|
-
# It should reject the empty input and exit.
|
30
|
-
def test_empty_input
|
31
|
-
out, err = intercept_output do
|
32
|
-
assert_raises(SystemExit) { OptParser.parse([]) }
|
33
|
-
end
|
34
|
-
assert_match(/You have to provide some options./, err)
|
35
|
-
end
|
36
|
-
|
37
|
-
# It should accept correct options.
|
38
|
-
# Invalid options is the matter of OptionParser itself,
|
39
|
-
# do not test it here.
|
40
|
-
# We test only, that OP exits and does not raise an exception.
|
41
|
-
def test_accept_correct_options
|
42
|
-
# this options we should treat separately
|
43
|
-
@valid_opts.delete('--help')
|
44
|
-
assert_nothing_raised { OptParser.parse(@valid_opts) }
|
45
|
-
|
46
|
-
stdout, stderr = intercept_output do
|
47
|
-
assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
|
48
|
-
end
|
49
|
-
|
50
|
-
assert_match(/You have provided an invalid option:/, stderr)
|
51
|
-
end
|
52
|
-
|
53
|
-
# It should successfully exit with some options.
|
54
|
-
def test_successful_exit
|
55
|
-
quietly do
|
56
|
-
success_args = ['-h', '--help']
|
57
|
-
success_args.each do |arg|
|
58
|
-
assert_raises(SystemExit) { OptParser.parse(arg.split) }
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
################################################################################
|
65
|
-
# It is a helper method, many testable units provide some verbose output
|
66
|
-
# to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
|
67
|
-
def quietly(&b)
|
68
|
-
begin
|
69
|
-
orig_stderr = $stderr.clone
|
70
|
-
orig_stdout = $stdout.clone
|
71
|
-
$stderr.reopen(File.new('/dev/null', 'w'))
|
72
|
-
$stdout.reopen(File.new('/dev/null', 'w'))
|
73
|
-
b.call
|
74
|
-
ensure
|
75
|
-
$stderr.reopen(orig_stderr)
|
76
|
-
$stdout.reopen(orig_stdout)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# It is a helper method for handling stdout and stderr as strings.
|
81
|
-
def intercept_output
|
82
|
-
orig_stdout = $stdout
|
83
|
-
orig_stderr = $stderr
|
84
|
-
$stdout = StringIO.new
|
85
|
-
$stderr = StringIO.new
|
86
|
-
|
87
|
-
yield
|
88
|
-
|
89
|
-
return $stdout.string, $stderr.string
|
90
|
-
ensure
|
91
|
-
$stdout = orig_stdout
|
92
|
-
$stderr = orig_stderr
|
93
|
-
end
|
94
|
-
|
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'erb'
|
2
|
-
|
3
|
-
module FunctionalTestHelper
|
4
|
-
PREF = 'test/functional/sample_experiment_files'
|
5
|
-
|
6
|
-
PRP_TEST_FILE = 'test/functional/sample_experiment_files/prp_test.salsa'
|
7
|
-
PRP_TEST_FILE_FRED_STD = "#{PREF}/prp_test.salsa.fred.standalone"
|
8
|
-
PRP_TEST_FILE_ROSY_STD = "#{PREF}/prp_test.salsa.rosy.standalone"
|
9
|
-
PRP_TRAIN_FILE = 'test/functional/sample_experiment_files/prp_train.salsa'
|
10
|
-
PRP_TRAIN_FILE_FRED_STD = "#{PREF}/prp_train.salsa.fred.standalone"
|
11
|
-
PRP_TRAIN_FILE_ROSY_STD = "#{PREF}/prp_train.salsa.rosy.standalone"
|
12
|
-
|
13
|
-
FRED_TEST_FILE = 'test/functional/sample_experiment_files/fred_test.salsa'
|
14
|
-
FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
|
15
|
-
ROSY_TEST_FILE = 'test/functional/sample_experiment_files/rosy_test.salsa'
|
16
|
-
ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
|
17
|
-
|
18
|
-
# Run an external process for functional testing and check the return code.
|
19
|
-
# <system> returns <true> if the external code exposes no errors.
|
20
|
-
# <@msg> is defined for every test object.
|
21
|
-
def execute(cmd)
|
22
|
-
status = system(cmd)
|
23
|
-
assert(status, @msg)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Create a temporary exp file only for this test.
|
27
|
-
# Shalmaneser needs absolute paths, we provide them in exp files
|
28
|
-
# using templating.
|
29
|
-
def create_exp_file(file)
|
30
|
-
template = File.read("#{file}.erb")
|
31
|
-
text = ERB.new(template).result
|
32
|
-
File.open(file, 'w') do |f|
|
33
|
-
f.write(text)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def remove_exp_file(file)
|
38
|
-
File.delete(file)
|
39
|
-
end
|
40
|
-
end
|
@@ -1,122 +0,0 @@
|
|
1
|
-
experiment_ID = fred_test
|
2
|
-
|
3
|
-
apply_to_all_known_targets = true
|
4
|
-
|
5
|
-
enduser_mode = false
|
6
|
-
|
7
|
-
verbose = true
|
8
|
-
|
9
|
-
|
10
|
-
############################
|
11
|
-
# Paths
|
12
|
-
# - fred_directory: directory where Fred puts its internal data
|
13
|
-
# - directory_output:
|
14
|
-
# redirect system output of disambiguated text (in SalsaTigerXML)
|
15
|
-
# to another directory.
|
16
|
-
# If you do not set anything here, output is to
|
17
|
-
# <fred_directory>/<experiment_ID>/output/stxml
|
18
|
-
# - classifier_dir:
|
19
|
-
# Write trained classifiers to this directory.
|
20
|
-
# If you do not set this parameter, classifiers are written to
|
21
|
-
# <fred_directory>/<experiment_ID>/classifiers
|
22
|
-
|
23
|
-
fred_directory = <%= File.expand_path('test/functional/output') %>
|
24
|
-
classifier_dir = <%= File.expand_path('test/functional/input/fred/cls') %>
|
25
|
-
# - preproc_descr_file_train / ...test
|
26
|
-
# where the experiment file for frprep is located
|
27
|
-
# (preprocessing for Fred and Rosy)
|
28
|
-
# for the preprocessing of the data used in this experiment
|
29
|
-
#
|
30
|
-
# give one preprocessing file name for the training data
|
31
|
-
# and one for the test data
|
32
|
-
# (If you only ever use test data in this experiment, you only
|
33
|
-
# need to give preproc_descr_file_test, and vice versa for training data.)
|
34
|
-
|
35
|
-
preproc_descr_file_test = <%= File.expand_path('test/functional/sample_experiment_files/prp_test.salsa.fred.standalone') %>
|
36
|
-
|
37
|
-
#####################
|
38
|
-
# noncontiguous input?
|
39
|
-
# if so, set 'noncontiguous_input' to 'true' (default is 'false')
|
40
|
-
# Also give the larger corpus from which the input sentences are:
|
41
|
-
# - directory
|
42
|
-
# - format: same possibilities as for frprep format
|
43
|
-
# - encoding: same possibilities as for frprep encoding
|
44
|
-
|
45
|
-
noncontiguous_input = false
|
46
|
-
#larger_corpus_dir =
|
47
|
-
larger_corpus_format = SalsaTigerXML
|
48
|
-
#larger_corpus_encoding = iso
|
49
|
-
|
50
|
-
|
51
|
-
#################
|
52
|
-
# Features
|
53
|
-
|
54
|
-
# bag-of-words context, with given context size,
|
55
|
-
# for example:
|
56
|
-
feature = context 50
|
57
|
-
feature = context 2
|
58
|
-
#
|
59
|
-
# (you can give more than one context feature line!)
|
60
|
-
#
|
61
|
-
# other possible features:
|
62
|
-
# feature = syntax
|
63
|
-
# feature = synsem
|
64
|
-
#
|
65
|
-
# syntax: grammatical functions
|
66
|
-
# synsem: grammatical functions plus headwords
|
67
|
-
|
68
|
-
#feature = context % %contextsize%
|
69
|
-
feature = syntax
|
70
|
-
|
71
|
-
# How to handle training data that is labeled
|
72
|
-
# with multiple sense labels?
|
73
|
-
# - binarize (default): This works only with binary classifiers.
|
74
|
-
# When featurizing for the binary classifiers, consider an item
|
75
|
-
# positive if its set of assigned labels includes the
|
76
|
-
# label for this binary classifier.
|
77
|
-
# - repeat: Repeat the instance, once for each
|
78
|
-
# sense label that has been assigned. (Basically, treat it
|
79
|
-
# as N instances with equal features but different labels.)
|
80
|
-
# - join: join all the assigned senses into one combined sense
|
81
|
-
# and treat that as a separate sense to train on.
|
82
|
-
# - keep: keep as multiple sense labels. (Note that this
|
83
|
-
# makes sense only for classifiers that can deal with
|
84
|
-
# multiple labels.)
|
85
|
-
|
86
|
-
#handle_multilabel = binarize
|
87
|
-
handle_multilabel = repeat
|
88
|
-
|
89
|
-
# What to do with numerical features?
|
90
|
-
# - keep: just leave as is
|
91
|
-
# - repeat: for a feature with max. numerical value N,
|
92
|
-
# use N binary features
|
93
|
-
# - bin: use a fixed number of bins, e.g. 5, then
|
94
|
-
# if feature value > 20: set all bins to 1,
|
95
|
-
# if feature value > 10: set the first four bins to 1,
|
96
|
-
# etc.
|
97
|
-
# default: bin.
|
98
|
-
#numerical_features = bin
|
99
|
-
numerical_features = keep
|
100
|
-
|
101
|
-
# Binary classifiers, or n-ary classifiers?
|
102
|
-
# if binary classifiers, set 'binary_classifiers = true'
|
103
|
-
# default is 'false'.
|
104
|
-
binary_classifiers = false
|
105
|
-
|
106
|
-
#################
|
107
|
-
# Fred internal settings
|
108
|
-
|
109
|
-
# what kind of classifier to use?
|
110
|
-
#
|
111
|
-
# format:
|
112
|
-
# <classifier type> <path> <optionally another path>
|
113
|
-
#
|
114
|
-
# for maxent, give first the path where maxent resides,
|
115
|
-
# then <where_shalmaneser_resides>/program/tools/maxent
|
116
|
-
classifier = maxent <%= File.expand_path('tools/maxent/maxent-2.4.0') %>
|
117
|
-
|
118
|
-
|
119
|
-
# for binary classifiers, you can set the pseudolabel
|
120
|
-
# on the 'negative' sense.
|
121
|
-
# Default is 'NONE'
|
122
|
-
negsense = NONE
|