shalmaneser-prep 1.2.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +93 -0
- data/lib/frprep/Ampersand.rb +39 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/TreetaggerInterface.rb +327 -0
- data/lib/frprep/do_parses.rb +143 -0
- data/lib/frprep/frprep.rb +693 -0
- data/lib/frprep/interfaces/berkeley_interface.rb +372 -0
- data/lib/frprep/interfaces/stanford_interface.rb +353 -0
- data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
- data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +58 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +99 -0
- data/test/functional/test_rosy.rb +40 -0
- metadata +85 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'stringio' # for helper methods
|
5
|
+
require 'frprep/opt_parser'
|
6
|
+
|
7
|
+
include FrPrep
|
8
|
+
|
9
|
+
class TestOptParser < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@exp_file = 'test/frprep/data/prp_test.salsa'
|
13
|
+
@valid_opts = ['--expfile', @exp_file,
|
14
|
+
'--help'
|
15
|
+
]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_public_methods
|
19
|
+
assert_respond_to(OptParser, :parse)
|
20
|
+
end
|
21
|
+
|
22
|
+
# It should return a FrPrepConfigData object.
|
23
|
+
def test_parse_method
|
24
|
+
input = ['-e', @exp_file]
|
25
|
+
return_value = OptParser.parse(input)
|
26
|
+
assert(return_value.instance_of?(FrPrepConfigData))
|
27
|
+
end
|
28
|
+
|
29
|
+
# It should reject the empty input and exit.
|
30
|
+
def test_empty_input
|
31
|
+
out, err = intercept_output do
|
32
|
+
assert_raises(SystemExit) { OptParser.parse([]) }
|
33
|
+
end
|
34
|
+
assert_match(/You have to provide some options./, err)
|
35
|
+
end
|
36
|
+
|
37
|
+
# It should accept correct options.
|
38
|
+
# Invalid options is the matter of OptionParser itself,
|
39
|
+
# do not test it here.
|
40
|
+
# We test only, that OP exits and does not raise an exception.
|
41
|
+
def test_accept_correct_options
|
42
|
+
# this options we should treat separately
|
43
|
+
@valid_opts.delete('--help')
|
44
|
+
assert_nothing_raised { OptParser.parse(@valid_opts) }
|
45
|
+
|
46
|
+
stdout, stderr = intercept_output do
|
47
|
+
assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
|
48
|
+
end
|
49
|
+
|
50
|
+
assert_match(/You have provided an invalid option:/, stderr)
|
51
|
+
end
|
52
|
+
|
53
|
+
# It should successfully exit with some options.
|
54
|
+
def test_successful_exit
|
55
|
+
quietly do
|
56
|
+
success_args = ['-h', '--help']
|
57
|
+
success_args.each do |arg|
|
58
|
+
assert_raises(SystemExit) { OptParser.parse(arg.split) }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
################################################################################
|
65
|
+
# It is a helper method, many testable units provide some verbose output
|
66
|
+
# to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
|
67
|
+
def quietly(&b)
|
68
|
+
begin
|
69
|
+
orig_stderr = $stderr.clone
|
70
|
+
orig_stdout = $stdout.clone
|
71
|
+
$stderr.reopen(File.new('/dev/null', 'w'))
|
72
|
+
$stdout.reopen(File.new('/dev/null', 'w'))
|
73
|
+
b.call
|
74
|
+
ensure
|
75
|
+
$stderr.reopen(orig_stderr)
|
76
|
+
$stdout.reopen(orig_stdout)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# It is a helper method for handling stdout and stderr as strings.
|
81
|
+
def intercept_output
|
82
|
+
orig_stdout = $stdout
|
83
|
+
orig_stderr = $stderr
|
84
|
+
$stdout = StringIO.new
|
85
|
+
$stderr = StringIO.new
|
86
|
+
|
87
|
+
yield
|
88
|
+
|
89
|
+
return $stdout.string, $stderr.string
|
90
|
+
ensure
|
91
|
+
$stdout = orig_stdout
|
92
|
+
$stderr = orig_stderr
|
93
|
+
end
|
94
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
|
4
|
+
# Setting $DEBUG will produce all external output.
|
5
|
+
# Otherwise it is suppreced.
|
6
|
+
module FunctionalTestHelper
|
7
|
+
PREF = 'test/functional/sample_experiment_files'
|
8
|
+
|
9
|
+
PRP_TEST_FILE = "#{PREF}/prp_test.salsa"
|
10
|
+
PRP_TEST_FILE_FRED_STD = "#{PREF}/prp_test.salsa.fred.standalone"
|
11
|
+
PRP_TEST_FILE_ROSY_STD = "#{PREF}/prp_test.salsa.rosy.standalone"
|
12
|
+
PRP_TRAIN_FILE = "#{PREF}/prp_train.salsa"
|
13
|
+
PRP_TRAIN_FILE_FRED_STD = "#{PREF}/prp_train.salsa.fred.standalone"
|
14
|
+
PRP_TRAIN_FILE_ROSY_STD = "#{PREF}/prp_train.salsa.rosy.standalone"
|
15
|
+
|
16
|
+
FRED_TEST_FILE = 'test/functional/sample_experiment_files/fred_test.salsa'
|
17
|
+
FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
|
18
|
+
ROSY_TEST_FILE = 'test/functional/sample_experiment_files/rosy_test.salsa'
|
19
|
+
ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
|
20
|
+
|
21
|
+
# Testing input for Preprocessor.
|
22
|
+
PRP_PLAININPUT = "#{PREF}/prp_plaininput"
|
23
|
+
PRP_STXMLINPUT = "#{PREF}/prp_stxmlinput"
|
24
|
+
PRP_TABINPUT = "#{PREF}/prp_tabinput"
|
25
|
+
PRP_FNXMLINPUT = "#{PREF}/prp_fnxmlinput"
|
26
|
+
PRP_FNCORPUSXMLINPUT = "#{PREF}/prp_fncorpusxmlinput"
|
27
|
+
|
28
|
+
# Testing output for Preprocessor.
|
29
|
+
PRP_STXMLOUTPUT = "#{PREF}/prp_stxmloutput"
|
30
|
+
PRP_TABOUTPUT = "#{PREF}/prp_taboutput"
|
31
|
+
|
32
|
+
# Run an external process for functional testing and check the return code.
|
33
|
+
# <system> returns <true> if the external code exposes no errors.
|
34
|
+
# <@msg> is defined for every test object.
|
35
|
+
# @param cmd [String]
|
36
|
+
def execute(cmd)
|
37
|
+
unless $DEBUG
|
38
|
+
cmd = cmd + ' 1>/dev/null 2>&1'
|
39
|
+
end
|
40
|
+
status = system(cmd)
|
41
|
+
assert(status, @msg)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Create a temporary exp file only for this test.
|
45
|
+
# Shalmaneser needs absolute paths, we provide them in exp files
|
46
|
+
# using templating.
|
47
|
+
def create_exp_file(file)
|
48
|
+
template = File.read("#{file}.erb")
|
49
|
+
text = ERB.new(template).result
|
50
|
+
File.open(file, 'w') do |f|
|
51
|
+
f.write(text)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def remove_exp_file(file)
|
56
|
+
File.delete(file)
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'functional/functional_test_helper'
|
5
|
+
|
6
|
+
class TestFred < Test::Unit::TestCase
|
7
|
+
|
8
|
+
include FunctionalTestHelper
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@msg = "Fred is doing bad, you've just broken something!"
|
12
|
+
@test_file = FRED_TEST_FILE
|
13
|
+
@train_file = FRED_TRAIN_FILE
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_fred_testing_featurization
|
17
|
+
create_exp_file(@test_file)
|
18
|
+
create_exp_file(PRP_TEST_FILE_FRED_STD)
|
19
|
+
execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
|
20
|
+
remove_exp_file(@test_file)
|
21
|
+
remove_exp_file(PRP_TEST_FILE_FRED_STD)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_fred_testing_tests
|
25
|
+
create_exp_file(@test_file)
|
26
|
+
create_exp_file(PRP_TEST_FILE_FRED_STD)
|
27
|
+
execute("ruby -I lib bin/fred -t test -e #{@test_file}")
|
28
|
+
remove_exp_file(@test_file)
|
29
|
+
remove_exp_file(PRP_TEST_FILE_FRED_STD)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_fred_training_featurization
|
33
|
+
create_exp_file(@train_file)
|
34
|
+
create_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
35
|
+
execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
|
36
|
+
remove_exp_file(@train_file)
|
37
|
+
remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_fred_training_train
|
41
|
+
create_exp_file(@train_file)
|
42
|
+
create_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
43
|
+
execute("ruby -I lib bin/fred -t train -e #{@train_file}")
|
44
|
+
remove_exp_file(@train_file)
|
45
|
+
remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'functional/functional_test_helper'
|
5
|
+
#require 'fileutils' # File.delete(), File.rename(), File.symlink()
|
6
|
+
|
7
|
+
class TestFrprep < Test::Unit::TestCase
|
8
|
+
|
9
|
+
include FunctionalTestHelper
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@msg = "FrPrep is doing bad, you've just broken something!"
|
13
|
+
@test_file = PRP_TEST_FILE
|
14
|
+
@train_file = PRP_TRAIN_FILE
|
15
|
+
@ptb = 'lib/frprep/interfaces/berkeley_interface.rb'
|
16
|
+
#link_berkeley
|
17
|
+
ENV['SHALM_BERKELEY_MODEL'] = 'sc_dash_labeled_1_smoothing.gr'
|
18
|
+
end
|
19
|
+
|
20
|
+
def teardown
|
21
|
+
#unlink_berkeley
|
22
|
+
end
|
23
|
+
def test_frprep_testing
|
24
|
+
create_exp_file(@test_file)
|
25
|
+
execute("ruby -I lib bin/frprep -e #{@test_file}")
|
26
|
+
remove_exp_file(@test_file)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_frprep_training
|
30
|
+
create_exp_file(@train_file)
|
31
|
+
execute("ruby -I lib bin/frprep -e #{@train_file}")
|
32
|
+
remove_exp_file(@train_file)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Testing input in different formats.
|
36
|
+
def test_frprep_plaininput
|
37
|
+
create_exp_file(PRP_PLAININPUT)
|
38
|
+
execute("ruby -I lib bin/frprep -e #{PRP_PLAININPUT}")
|
39
|
+
remove_exp_file(PRP_PLAININPUT)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_frprep_stxmlinput
|
43
|
+
create_exp_file(PRP_STXMLINPUT)
|
44
|
+
execute("ruby -I lib bin/frprep -e #{PRP_STXMLINPUT}")
|
45
|
+
remove_exp_file(PRP_STXMLINPUT)
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_frprep_tabinput
|
49
|
+
create_exp_file(PRP_TABINPUT)
|
50
|
+
execute("ruby -I lib bin/frprep -e #{PRP_TABINPUT}")
|
51
|
+
remove_exp_file(PRP_TABINPUT)
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_frprep_fncorpusxmlinput
|
55
|
+
create_exp_file(PRP_FNCORPUSXMLINPUT)
|
56
|
+
execute("ruby -I lib bin/frprep -e #{PRP_FNCORPUSXMLINPUT}")
|
57
|
+
remove_exp_file(PRP_FNCORPUSXMLINPUT)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_frprep_fnxmlinput
|
61
|
+
create_exp_file(PRP_FNXMLINPUT)
|
62
|
+
execute("ruby -I lib bin/frprep -e #{PRP_FNXMLINPUT}")
|
63
|
+
remove_exp_file(PRP_FNXMLINPUT)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Testing output in different formats.
|
67
|
+
# We test only on German input assuming English input to work.
|
68
|
+
def test_frprep_stxmloutput
|
69
|
+
create_exp_file(PRP_STXMLOUTPUT)
|
70
|
+
execute("ruby -I lib bin/frprep -e #{PRP_STXMLOUTPUT}")
|
71
|
+
remove_exp_file(PRP_STXMLOUTPUT)
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_frprep_taboutput
|
75
|
+
create_exp_file(PRP_TABOUTPUT)
|
76
|
+
execute("ruby -I lib bin/frprep -e #{PRP_TABOUTPUT}")
|
77
|
+
remove_exp_file(PRP_TABOUTPUT)
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
private
|
82
|
+
# Berkeley Parser takes a long time which is bad for testing.
|
83
|
+
# We ran it once and reuse the result file in our tests.
|
84
|
+
# Before every test we link the Berkeley interface to a stub
|
85
|
+
# with the BP invocation switched off.
|
86
|
+
def link_berkeley
|
87
|
+
File.rename(@ptb, "#{@ptb}.bak")
|
88
|
+
File.symlink(
|
89
|
+
File.expand_path('test/functional/berkeley_interface.rb.stub'),
|
90
|
+
File.expand_path(@ptb)
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
94
|
+
# After testing we bring the right interface back, the program remains intact.
|
95
|
+
def unlink_berkeley
|
96
|
+
File.delete(@ptb)
|
97
|
+
File.rename("#{@ptb}.bak", @ptb)
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'functional/functional_test_helper'
|
5
|
+
|
6
|
+
class TestRosy < Test::Unit::TestCase
|
7
|
+
include FunctionalTestHelper
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@msg = "Rosy is doing bad, you've just broken something!"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_rosy_testing
|
14
|
+
create_exp_file(ROSY_TEST_FILE)
|
15
|
+
create_exp_file(PRP_TEST_FILE_ROSY_STD)
|
16
|
+
execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
|
17
|
+
execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
|
18
|
+
remove_exp_file(ROSY_TEST_FILE)
|
19
|
+
remove_exp_file(PRP_TEST_FILE_ROSY_STD)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_rosy_training
|
23
|
+
create_exp_file(ROSY_TRAIN_FILE)
|
24
|
+
create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
25
|
+
execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
|
26
|
+
execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
|
27
|
+
execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
|
28
|
+
remove_exp_file(ROSY_TRAIN_FILE)
|
29
|
+
remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_rosy_training_onestep
|
33
|
+
create_exp_file(ROSY_TRAIN_FILE)
|
34
|
+
create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
35
|
+
execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
|
36
|
+
execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
|
37
|
+
remove_exp_file(ROSY_TRAIN_FILE)
|
38
|
+
remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
39
|
+
end
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: shalmaneser-prep
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.2.0.rc4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrei Beliankou
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: PREP - Fred and Rosy PREProcessor.
|
14
|
+
email: arbox@yandex.ru
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files:
|
18
|
+
- README.md
|
19
|
+
- LICENSE.md
|
20
|
+
- CHANGELOG.md
|
21
|
+
files:
|
22
|
+
- ".yardopts"
|
23
|
+
- CHANGELOG.md
|
24
|
+
- LICENSE.md
|
25
|
+
- README.md
|
26
|
+
- lib/frprep/Ampersand.rb
|
27
|
+
- lib/frprep/CollinsInterface.rb
|
28
|
+
- lib/frprep/Counter.rb
|
29
|
+
- lib/frprep/FNCorpusXML.rb
|
30
|
+
- lib/frprep/FNDatabase.rb
|
31
|
+
- lib/frprep/FrameXML.rb
|
32
|
+
- lib/frprep/Graph.rb
|
33
|
+
- lib/frprep/MiniparInterface.rb
|
34
|
+
- lib/frprep/RegXML.rb
|
35
|
+
- lib/frprep/STXmlTerminalOrder.rb
|
36
|
+
- lib/frprep/SleepyInterface.rb
|
37
|
+
- lib/frprep/TntInterface.rb
|
38
|
+
- lib/frprep/TreetaggerInterface.rb
|
39
|
+
- lib/frprep/do_parses.rb
|
40
|
+
- lib/frprep/frprep.rb
|
41
|
+
- lib/frprep/interfaces/berkeley_interface.rb
|
42
|
+
- lib/frprep/interfaces/stanford_interface.rb
|
43
|
+
- lib/frprep/interpreters/berkeley_interpreter.rb
|
44
|
+
- lib/frprep/interpreters/stanford_interpreter.rb
|
45
|
+
- lib/frprep/one_parsed_file.rb
|
46
|
+
- lib/frprep/opt_parser.rb
|
47
|
+
- lib/frprep/ruby_class_extensions.rb
|
48
|
+
- test/frprep/test_opt_parser.rb
|
49
|
+
- test/functional/functional_test_helper.rb
|
50
|
+
- test/functional/test_fred.rb
|
51
|
+
- test/functional/test_frprep.rb
|
52
|
+
- test/functional/test_rosy.rb
|
53
|
+
homepage: https://github.com/arbox/shalmaneser
|
54
|
+
licenses:
|
55
|
+
- GPL-2.0
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options:
|
59
|
+
- "-m"
|
60
|
+
- README.md
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - '='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '2.0'
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 1.3.1
|
73
|
+
requirements: []
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 2.4.5
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: PREP
|
79
|
+
test_files:
|
80
|
+
- test/frprep/test_opt_parser.rb
|
81
|
+
- test/functional/functional_test_helper.rb
|
82
|
+
- test/functional/test_fred.rb
|
83
|
+
- test/functional/test_frprep.rb
|
84
|
+
- test/functional/test_rosy.rb
|
85
|
+
has_rdoc:
|