shalmaneser-prep 1.2.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +93 -0
- data/lib/frprep/Ampersand.rb +39 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/TreetaggerInterface.rb +327 -0
- data/lib/frprep/do_parses.rb +143 -0
- data/lib/frprep/frprep.rb +693 -0
- data/lib/frprep/interfaces/berkeley_interface.rb +372 -0
- data/lib/frprep/interfaces/stanford_interface.rb +353 -0
- data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
- data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +58 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +99 -0
- data/test/functional/test_rosy.rb +40 -0
- metadata +85 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'stringio' # for helper methods
|
5
|
+
require 'frprep/opt_parser'
|
6
|
+
|
7
|
+
include FrPrep
|
8
|
+
|
9
|
+
class TestOptParser < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@exp_file = 'test/frprep/data/prp_test.salsa'
|
13
|
+
@valid_opts = ['--expfile', @exp_file,
|
14
|
+
'--help'
|
15
|
+
]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_public_methods
|
19
|
+
assert_respond_to(OptParser, :parse)
|
20
|
+
end
|
21
|
+
|
22
|
+
# It should return a FrPrepConfigData object.
|
23
|
+
def test_parse_method
|
24
|
+
input = ['-e', @exp_file]
|
25
|
+
return_value = OptParser.parse(input)
|
26
|
+
assert(return_value.instance_of?(FrPrepConfigData))
|
27
|
+
end
|
28
|
+
|
29
|
+
# It should reject the empty input and exit.
|
30
|
+
def test_empty_input
|
31
|
+
out, err = intercept_output do
|
32
|
+
assert_raises(SystemExit) { OptParser.parse([]) }
|
33
|
+
end
|
34
|
+
assert_match(/You have to provide some options./, err)
|
35
|
+
end
|
36
|
+
|
37
|
+
# It should accept correct options.
|
38
|
+
# Invalid options is the matter of OptionParser itself,
|
39
|
+
# do not test it here.
|
40
|
+
# We test only, that OP exits and does not raise an exception.
|
41
|
+
def test_accept_correct_options
|
42
|
+
# this options we should treat separately
|
43
|
+
@valid_opts.delete('--help')
|
44
|
+
assert_nothing_raised { OptParser.parse(@valid_opts) }
|
45
|
+
|
46
|
+
stdout, stderr = intercept_output do
|
47
|
+
assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
|
48
|
+
end
|
49
|
+
|
50
|
+
assert_match(/You have provided an invalid option:/, stderr)
|
51
|
+
end
|
52
|
+
|
53
|
+
# It should successfully exit with some options.
|
54
|
+
def test_successful_exit
|
55
|
+
quietly do
|
56
|
+
success_args = ['-h', '--help']
|
57
|
+
success_args.each do |arg|
|
58
|
+
assert_raises(SystemExit) { OptParser.parse(arg.split) }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
################################################################################
|
65
|
+
# It is a helper method, many testable units provide some verbose output
|
66
|
+
# to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
|
67
|
+
def quietly(&b)
|
68
|
+
begin
|
69
|
+
orig_stderr = $stderr.clone
|
70
|
+
orig_stdout = $stdout.clone
|
71
|
+
$stderr.reopen(File.new('/dev/null', 'w'))
|
72
|
+
$stdout.reopen(File.new('/dev/null', 'w'))
|
73
|
+
b.call
|
74
|
+
ensure
|
75
|
+
$stderr.reopen(orig_stderr)
|
76
|
+
$stdout.reopen(orig_stdout)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# It is a helper method for handling stdout and stderr as strings.
|
81
|
+
def intercept_output
|
82
|
+
orig_stdout = $stdout
|
83
|
+
orig_stderr = $stderr
|
84
|
+
$stdout = StringIO.new
|
85
|
+
$stderr = StringIO.new
|
86
|
+
|
87
|
+
yield
|
88
|
+
|
89
|
+
return $stdout.string, $stderr.string
|
90
|
+
ensure
|
91
|
+
$stdout = orig_stdout
|
92
|
+
$stderr = orig_stderr
|
93
|
+
end
|
94
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
3
|
+
|
4
|
+
# Setting $DEBUG will produce all external output.
|
5
|
+
# Otherwise it is suppreced.
|
6
|
+
module FunctionalTestHelper
|
7
|
+
PREF = 'test/functional/sample_experiment_files'
|
8
|
+
|
9
|
+
PRP_TEST_FILE = "#{PREF}/prp_test.salsa"
|
10
|
+
PRP_TEST_FILE_FRED_STD = "#{PREF}/prp_test.salsa.fred.standalone"
|
11
|
+
PRP_TEST_FILE_ROSY_STD = "#{PREF}/prp_test.salsa.rosy.standalone"
|
12
|
+
PRP_TRAIN_FILE = "#{PREF}/prp_train.salsa"
|
13
|
+
PRP_TRAIN_FILE_FRED_STD = "#{PREF}/prp_train.salsa.fred.standalone"
|
14
|
+
PRP_TRAIN_FILE_ROSY_STD = "#{PREF}/prp_train.salsa.rosy.standalone"
|
15
|
+
|
16
|
+
FRED_TEST_FILE = 'test/functional/sample_experiment_files/fred_test.salsa'
|
17
|
+
FRED_TRAIN_FILE = 'test/functional/sample_experiment_files/fred_train.salsa'
|
18
|
+
ROSY_TEST_FILE = 'test/functional/sample_experiment_files/rosy_test.salsa'
|
19
|
+
ROSY_TRAIN_FILE = 'test/functional/sample_experiment_files/rosy_train.salsa'
|
20
|
+
|
21
|
+
# Testing input for Preprocessor.
|
22
|
+
PRP_PLAININPUT = "#{PREF}/prp_plaininput"
|
23
|
+
PRP_STXMLINPUT = "#{PREF}/prp_stxmlinput"
|
24
|
+
PRP_TABINPUT = "#{PREF}/prp_tabinput"
|
25
|
+
PRP_FNXMLINPUT = "#{PREF}/prp_fnxmlinput"
|
26
|
+
PRP_FNCORPUSXMLINPUT = "#{PREF}/prp_fncorpusxmlinput"
|
27
|
+
|
28
|
+
# Testing output for Preprocessor.
|
29
|
+
PRP_STXMLOUTPUT = "#{PREF}/prp_stxmloutput"
|
30
|
+
PRP_TABOUTPUT = "#{PREF}/prp_taboutput"
|
31
|
+
|
32
|
+
# Run an external process for functional testing and check the return code.
|
33
|
+
# <system> returns <true> if the external code exposes no errors.
|
34
|
+
# <@msg> is defined for every test object.
|
35
|
+
# @param cmd [String]
|
36
|
+
def execute(cmd)
|
37
|
+
unless $DEBUG
|
38
|
+
cmd = cmd + ' 1>/dev/null 2>&1'
|
39
|
+
end
|
40
|
+
status = system(cmd)
|
41
|
+
assert(status, @msg)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Create a temporary exp file only for this test.
|
45
|
+
# Shalmaneser needs absolute paths, we provide them in exp files
|
46
|
+
# using templating.
|
47
|
+
def create_exp_file(file)
|
48
|
+
template = File.read("#{file}.erb")
|
49
|
+
text = ERB.new(template).result
|
50
|
+
File.open(file, 'w') do |f|
|
51
|
+
f.write(text)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def remove_exp_file(file)
|
56
|
+
File.delete(file)
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'functional/functional_test_helper'
|
5
|
+
|
6
|
+
class TestFred < Test::Unit::TestCase
|
7
|
+
|
8
|
+
include FunctionalTestHelper
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@msg = "Fred is doing bad, you've just broken something!"
|
12
|
+
@test_file = FRED_TEST_FILE
|
13
|
+
@train_file = FRED_TRAIN_FILE
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_fred_testing_featurization
|
17
|
+
create_exp_file(@test_file)
|
18
|
+
create_exp_file(PRP_TEST_FILE_FRED_STD)
|
19
|
+
execute("ruby -I lib bin/fred -t featurize -e #{@test_file} -d test")
|
20
|
+
remove_exp_file(@test_file)
|
21
|
+
remove_exp_file(PRP_TEST_FILE_FRED_STD)
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_fred_testing_tests
|
25
|
+
create_exp_file(@test_file)
|
26
|
+
create_exp_file(PRP_TEST_FILE_FRED_STD)
|
27
|
+
execute("ruby -I lib bin/fred -t test -e #{@test_file}")
|
28
|
+
remove_exp_file(@test_file)
|
29
|
+
remove_exp_file(PRP_TEST_FILE_FRED_STD)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_fred_training_featurization
|
33
|
+
create_exp_file(@train_file)
|
34
|
+
create_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
35
|
+
execute("ruby -I lib bin/fred -t featurize -e #{@train_file} -d train")
|
36
|
+
remove_exp_file(@train_file)
|
37
|
+
remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_fred_training_train
|
41
|
+
create_exp_file(@train_file)
|
42
|
+
create_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
43
|
+
execute("ruby -I lib bin/fred -t train -e #{@train_file}")
|
44
|
+
remove_exp_file(@train_file)
|
45
|
+
remove_exp_file(PRP_TRAIN_FILE_FRED_STD)
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'functional/functional_test_helper'
|
5
|
+
#require 'fileutils' # File.delete(), File.rename(), File.symlink()
|
6
|
+
|
7
|
+
class TestFrprep < Test::Unit::TestCase
|
8
|
+
|
9
|
+
include FunctionalTestHelper
|
10
|
+
|
11
|
+
def setup
|
12
|
+
@msg = "FrPrep is doing bad, you've just broken something!"
|
13
|
+
@test_file = PRP_TEST_FILE
|
14
|
+
@train_file = PRP_TRAIN_FILE
|
15
|
+
@ptb = 'lib/frprep/interfaces/berkeley_interface.rb'
|
16
|
+
#link_berkeley
|
17
|
+
ENV['SHALM_BERKELEY_MODEL'] = 'sc_dash_labeled_1_smoothing.gr'
|
18
|
+
end
|
19
|
+
|
20
|
+
def teardown
|
21
|
+
#unlink_berkeley
|
22
|
+
end
|
23
|
+
def test_frprep_testing
|
24
|
+
create_exp_file(@test_file)
|
25
|
+
execute("ruby -I lib bin/frprep -e #{@test_file}")
|
26
|
+
remove_exp_file(@test_file)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_frprep_training
|
30
|
+
create_exp_file(@train_file)
|
31
|
+
execute("ruby -I lib bin/frprep -e #{@train_file}")
|
32
|
+
remove_exp_file(@train_file)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Testing input in different formats.
|
36
|
+
def test_frprep_plaininput
|
37
|
+
create_exp_file(PRP_PLAININPUT)
|
38
|
+
execute("ruby -I lib bin/frprep -e #{PRP_PLAININPUT}")
|
39
|
+
remove_exp_file(PRP_PLAININPUT)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_frprep_stxmlinput
|
43
|
+
create_exp_file(PRP_STXMLINPUT)
|
44
|
+
execute("ruby -I lib bin/frprep -e #{PRP_STXMLINPUT}")
|
45
|
+
remove_exp_file(PRP_STXMLINPUT)
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_frprep_tabinput
|
49
|
+
create_exp_file(PRP_TABINPUT)
|
50
|
+
execute("ruby -I lib bin/frprep -e #{PRP_TABINPUT}")
|
51
|
+
remove_exp_file(PRP_TABINPUT)
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_frprep_fncorpusxmlinput
|
55
|
+
create_exp_file(PRP_FNCORPUSXMLINPUT)
|
56
|
+
execute("ruby -I lib bin/frprep -e #{PRP_FNCORPUSXMLINPUT}")
|
57
|
+
remove_exp_file(PRP_FNCORPUSXMLINPUT)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_frprep_fnxmlinput
|
61
|
+
create_exp_file(PRP_FNXMLINPUT)
|
62
|
+
execute("ruby -I lib bin/frprep -e #{PRP_FNXMLINPUT}")
|
63
|
+
remove_exp_file(PRP_FNXMLINPUT)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Testing output in different formats.
|
67
|
+
# We test only on German input assuming English input to work.
|
68
|
+
def test_frprep_stxmloutput
|
69
|
+
create_exp_file(PRP_STXMLOUTPUT)
|
70
|
+
execute("ruby -I lib bin/frprep -e #{PRP_STXMLOUTPUT}")
|
71
|
+
remove_exp_file(PRP_STXMLOUTPUT)
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_frprep_taboutput
|
75
|
+
create_exp_file(PRP_TABOUTPUT)
|
76
|
+
execute("ruby -I lib bin/frprep -e #{PRP_TABOUTPUT}")
|
77
|
+
remove_exp_file(PRP_TABOUTPUT)
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
private
|
82
|
+
# Berkeley Parser takes a long time which is bad for testing.
|
83
|
+
# We ran it once and reuse the result file in our tests.
|
84
|
+
# Before every test we link the Berkeley interface to a stub
|
85
|
+
# with the BP invocation switched off.
|
86
|
+
def link_berkeley
|
87
|
+
File.rename(@ptb, "#{@ptb}.bak")
|
88
|
+
File.symlink(
|
89
|
+
File.expand_path('test/functional/berkeley_interface.rb.stub'),
|
90
|
+
File.expand_path(@ptb)
|
91
|
+
)
|
92
|
+
end
|
93
|
+
|
94
|
+
# After testing we bring the right interface back, the program remains intact.
|
95
|
+
def unlink_berkeley
|
96
|
+
File.delete(@ptb)
|
97
|
+
File.rename("#{@ptb}.bak", @ptb)
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'functional/functional_test_helper'
|
5
|
+
|
6
|
+
class TestRosy < Test::Unit::TestCase
|
7
|
+
include FunctionalTestHelper
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@msg = "Rosy is doing bad, you've just broken something!"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_rosy_testing
|
14
|
+
create_exp_file(ROSY_TEST_FILE)
|
15
|
+
create_exp_file(PRP_TEST_FILE_ROSY_STD)
|
16
|
+
execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TEST_FILE} -d test")
|
17
|
+
execute("ruby -rubygems -I lib bin/rosy -t test -e #{ROSY_TEST_FILE}")
|
18
|
+
remove_exp_file(ROSY_TEST_FILE)
|
19
|
+
remove_exp_file(PRP_TEST_FILE_ROSY_STD)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_rosy_training
|
23
|
+
create_exp_file(ROSY_TRAIN_FILE)
|
24
|
+
create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
25
|
+
execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
|
26
|
+
execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s argrec")
|
27
|
+
execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s arglab")
|
28
|
+
remove_exp_file(ROSY_TRAIN_FILE)
|
29
|
+
remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_rosy_training_onestep
|
33
|
+
create_exp_file(ROSY_TRAIN_FILE)
|
34
|
+
create_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
35
|
+
execute("ruby -rubygems -I lib bin/rosy -t featurize -e #{ROSY_TRAIN_FILE} -d train")
|
36
|
+
execute("ruby -rubygems -I lib bin/rosy -t train -e #{ROSY_TRAIN_FILE} -s onestep")
|
37
|
+
remove_exp_file(ROSY_TRAIN_FILE)
|
38
|
+
remove_exp_file(PRP_TRAIN_FILE_ROSY_STD)
|
39
|
+
end
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: shalmaneser-prep
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.2.0.rc4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrei Beliankou
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: PREP - Fred and Rosy PREProcessor.
|
14
|
+
email: arbox@yandex.ru
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files:
|
18
|
+
- README.md
|
19
|
+
- LICENSE.md
|
20
|
+
- CHANGELOG.md
|
21
|
+
files:
|
22
|
+
- ".yardopts"
|
23
|
+
- CHANGELOG.md
|
24
|
+
- LICENSE.md
|
25
|
+
- README.md
|
26
|
+
- lib/frprep/Ampersand.rb
|
27
|
+
- lib/frprep/CollinsInterface.rb
|
28
|
+
- lib/frprep/Counter.rb
|
29
|
+
- lib/frprep/FNCorpusXML.rb
|
30
|
+
- lib/frprep/FNDatabase.rb
|
31
|
+
- lib/frprep/FrameXML.rb
|
32
|
+
- lib/frprep/Graph.rb
|
33
|
+
- lib/frprep/MiniparInterface.rb
|
34
|
+
- lib/frprep/RegXML.rb
|
35
|
+
- lib/frprep/STXmlTerminalOrder.rb
|
36
|
+
- lib/frprep/SleepyInterface.rb
|
37
|
+
- lib/frprep/TntInterface.rb
|
38
|
+
- lib/frprep/TreetaggerInterface.rb
|
39
|
+
- lib/frprep/do_parses.rb
|
40
|
+
- lib/frprep/frprep.rb
|
41
|
+
- lib/frprep/interfaces/berkeley_interface.rb
|
42
|
+
- lib/frprep/interfaces/stanford_interface.rb
|
43
|
+
- lib/frprep/interpreters/berkeley_interpreter.rb
|
44
|
+
- lib/frprep/interpreters/stanford_interpreter.rb
|
45
|
+
- lib/frprep/one_parsed_file.rb
|
46
|
+
- lib/frprep/opt_parser.rb
|
47
|
+
- lib/frprep/ruby_class_extensions.rb
|
48
|
+
- test/frprep/test_opt_parser.rb
|
49
|
+
- test/functional/functional_test_helper.rb
|
50
|
+
- test/functional/test_fred.rb
|
51
|
+
- test/functional/test_frprep.rb
|
52
|
+
- test/functional/test_rosy.rb
|
53
|
+
homepage: https://github.com/arbox/shalmaneser
|
54
|
+
licenses:
|
55
|
+
- GPL-2.0
|
56
|
+
metadata: {}
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options:
|
59
|
+
- "-m"
|
60
|
+
- README.md
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - '='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '2.0'
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 1.3.1
|
73
|
+
requirements: []
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 2.4.5
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: PREP
|
79
|
+
test_files:
|
80
|
+
- test/frprep/test_opt_parser.rb
|
81
|
+
- test/functional/functional_test_helper.rb
|
82
|
+
- test/functional/test_fred.rb
|
83
|
+
- test/functional/test_frprep.rb
|
84
|
+
- test/functional/test_rosy.rb
|
85
|
+
has_rdoc:
|