opener-constituent-parser-base 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +79 -0
- data/bin/constituent-parser-en +8 -0
- data/bin/constituent-parser-es +8 -0
- data/bin/constituent-parser-fr +8 -0
- data/bin/constituent-parser-it +8 -0
- data/core/target/ehu-parse-1.0.jar +0 -0
- data/lib/opener/constituent_parsers/base.rb +96 -0
- data/lib/opener/constituent_parsers/base/version.rb +7 -0
- data/opener-constituent-parser-base.gemspec +29 -0
- metadata +112 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8daa009b3c65632398a5b77dbbe75e6c4d38968c
|
4
|
+
data.tar.gz: 4e446f65248f01b84bd82e666542272b2c22fef7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: afabc3b04933f70e811278cb4fee2460c19cebf591779db2358310c79920578e28905bc15b633756035a13ebbc2907bbdfdb364cfa75be9c606d190f9807c508
|
7
|
+
data.tar.gz: 1a6ae1e2eff88c2e918ef57a0e7a13176f6ee7a8eedd122b1199e179b588d8f00efcfc6d070f4c4a6a179829eb05e0556c9647d9093aaefdbf6c88e733911efc
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
[](https://drone.io/github.com/opener-project/constituent-parser-base/latest)
|
2
|
+
|
3
|
+
# Constituent Base Parsers
|
4
|
+
|
5
|
+
This repository contains the source code of the base constituent parser which
|
6
|
+
supports the following languages:
|
7
|
+
|
8
|
+
* English
|
9
|
+
* Spanish
|
10
|
+
* Italian
|
11
|
+
|
12
|
+
The parser takes KAF documents (with <wf> elements as it needs tokenized text)
|
13
|
+
as standard input and outputs constituent syntactic analysis in treebank
|
14
|
+
format, one sentence per line. It also provides an option of outputting the
|
15
|
+
constituent heads, as defined by Collins PhD thesis.
|
16
|
+
|
17
|
+
## Requirements
|
18
|
+
|
19
|
+
* Java 1.7 or newer
|
20
|
+
* Ruby 1.9.2 or newer
|
21
|
+
|
22
|
+
Development requirements:
|
23
|
+
|
24
|
+
* Maven
|
25
|
+
* Bundler
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
|
29
|
+
Installing as a regular Gem:
|
30
|
+
|
31
|
+
gem install opener-constituent-parser-base
|
32
|
+
|
33
|
+
Using Bundler:
|
34
|
+
|
35
|
+
gem 'opener-constituent-parser-base',
|
36
|
+
:git => 'git@github.com:opener-project/constituent-parser-base.git',
|
37
|
+
:branch => 'master'
|
38
|
+
|
39
|
+
Using specific install:
|
40
|
+
|
41
|
+
gem install specific_install
|
42
|
+
gem specific_install opener-constituent-parser-base \
|
43
|
+
-l https://github.com/opener-project/constituent-parser-base.git
|
44
|
+
|
45
|
+
## Usage
|
46
|
+
|
47
|
+
cat some_input_file.kaf | constituent-parser-base
|
48
|
+
|
49
|
+
## Contributing
|
50
|
+
|
51
|
+
First make sure all the required dependencies are installed:
|
52
|
+
|
53
|
+
bundle install
|
54
|
+
|
55
|
+
Then compile the required Java code:
|
56
|
+
|
57
|
+
bundle exec rake compile
|
58
|
+
|
59
|
+
For this you'll need to have Java 1.7 and Maven installed. These requirements
|
60
|
+
are verified for you before the Rake task calls Maven.
|
61
|
+
|
62
|
+
## Testing
|
63
|
+
|
64
|
+
To run the tests (which are powered by Cucumber), simply run the following:
|
65
|
+
|
66
|
+
bundle exec rake
|
67
|
+
|
68
|
+
This will take care of verifying the requirements, installing the required Java
|
69
|
+
packages and running the tests.
|
70
|
+
|
71
|
+
For more information on the available Rake tasks run the following:
|
72
|
+
|
73
|
+
bundle exec rake -T
|
74
|
+
|
75
|
+
## Structure
|
76
|
+
|
77
|
+
This repository comes in two parts: a collection of Java source files and Ruby
|
78
|
+
source files. The Java code can be found in the `core/` directory, everything
|
79
|
+
else will be Ruby source code.
|
Binary file
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
require 'java'
|
5
|
+
require File.expand_path('../../../../core/target/ehu-parse-1.0.jar', __FILE__)
|
6
|
+
|
7
|
+
import 'ehu.parse.Annotate'
|
8
|
+
import 'ixa.kaflib.KAFDocument'
|
9
|
+
import 'java.io.InputStreamReader'
|
10
|
+
import 'ehu.heads.CollinsHeadFinder'
|
11
|
+
|
12
|
+
require_relative 'base/version'
|
13
|
+
|
14
|
+
module Opener
|
15
|
+
module ConstituentParsers
|
16
|
+
##
|
17
|
+
# The base constituent parser kernel that supports multiple languages such
|
18
|
+
# as English and Spanish.
|
19
|
+
#
|
20
|
+
# @!attribute [r] args
|
21
|
+
# @return [Array]
|
22
|
+
# @!attribute [r] options
|
23
|
+
# @return [Hash]
|
24
|
+
#
|
25
|
+
class Base
|
26
|
+
attr_reader :args, :options
|
27
|
+
|
28
|
+
##
|
29
|
+
# The default language to use.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
#
|
33
|
+
DEFAULT_LANGUAGE = 'en'.freeze
|
34
|
+
|
35
|
+
##
|
36
|
+
# Hash containing the default options to use.
|
37
|
+
#
|
38
|
+
# @return [Hash]
|
39
|
+
#
|
40
|
+
DEFAULT_OPTIONS = {
|
41
|
+
:args => [],
|
42
|
+
:language => DEFAULT_LANGUAGE
|
43
|
+
}.freeze
|
44
|
+
|
45
|
+
##
|
46
|
+
# @param [Hash] options
|
47
|
+
#
|
48
|
+
# @option options [Array] :args The commandline arguments to pass to the
|
49
|
+
# underlying Java code.
|
50
|
+
#
|
51
|
+
# @see Opener::ConstituentParsers::DEFAULT_OPTIONS
|
52
|
+
#
|
53
|
+
def initialize(options = {})
|
54
|
+
options = DEFAULT_OPTIONS.merge(options)
|
55
|
+
@args = options.delete(:args) || []
|
56
|
+
@options = options
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# Runs the command and returns the output of STDOUT, STDERR and the
|
61
|
+
# process information.
|
62
|
+
#
|
63
|
+
# @param [String] input The input to process.
|
64
|
+
# @return [Array]
|
65
|
+
#
|
66
|
+
def run(input)
|
67
|
+
input = StringIO.new(input) unless input.kind_of?(IO)
|
68
|
+
annotator = Java::ehu.parse.Annotate.new(language)
|
69
|
+
reader = InputStreamReader.new(input.to_inputstream)
|
70
|
+
kaf = KAFDocument.create_from_stream(reader)
|
71
|
+
kaf.add_linguistic_processor("constituents","ehu-parse-"+language,"now","1.0")
|
72
|
+
|
73
|
+
if heads?
|
74
|
+
head_finder = CollinsHeadFinder.new(language)
|
75
|
+
annotator.parseWithHeads(kaf, head_finder)
|
76
|
+
else
|
77
|
+
annotator.parse(kaf)
|
78
|
+
end
|
79
|
+
|
80
|
+
return kaf.to_string
|
81
|
+
end
|
82
|
+
#
|
83
|
+
##
|
84
|
+
# @return [String]
|
85
|
+
#
|
86
|
+
def language
|
87
|
+
return options[:language]
|
88
|
+
end
|
89
|
+
|
90
|
+
def heads?
|
91
|
+
true
|
92
|
+
end
|
93
|
+
|
94
|
+
end # Base
|
95
|
+
end # ConstituentParsers
|
96
|
+
end # Opener
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.expand_path(
|
2
|
+
'../lib/opener/constituent_parsers/base/version',
|
3
|
+
__FILE__
|
4
|
+
)
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = 'opener-constituent-parser-base'
|
8
|
+
gem.version = Opener::ConstituentParsers::Base::VERSION
|
9
|
+
gem.authors = ['development@olery.com']
|
10
|
+
gem.summary = 'Constituent parser that supports various languages.'
|
11
|
+
gem.description = gem.summary
|
12
|
+
gem.has_rdoc = 'yard'
|
13
|
+
|
14
|
+
gem.required_ruby_version = '>= 1.9.2'
|
15
|
+
|
16
|
+
gem.files = Dir.glob([
|
17
|
+
'core/target/ehu-parse-*.jar',
|
18
|
+
'lib/**/*.*',
|
19
|
+
'*.gemspec',
|
20
|
+
'README.md'
|
21
|
+
])
|
22
|
+
|
23
|
+
gem.executables = Dir.glob('bin/*').map { |f| File.basename(f) }
|
24
|
+
|
25
|
+
gem.add_development_dependency 'opener-build-tools'
|
26
|
+
gem.add_development_dependency 'rspec'
|
27
|
+
gem.add_development_dependency 'cucumber'
|
28
|
+
gem.add_development_dependency 'rake'
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: opener-constituent-parser-base
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- development@olery.com
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: opener-build-tools
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: cucumber
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
prerelease: false
|
54
|
+
type: :development
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
prerelease: false
|
68
|
+
type: :development
|
69
|
+
description: Constituent parser that supports various languages.
|
70
|
+
email:
|
71
|
+
executables:
|
72
|
+
- constituent-parser-fr
|
73
|
+
- constituent-parser-en
|
74
|
+
- constituent-parser-es
|
75
|
+
- constituent-parser-it
|
76
|
+
extensions: []
|
77
|
+
extra_rdoc_files: []
|
78
|
+
files:
|
79
|
+
- core/target/ehu-parse-1.0.jar
|
80
|
+
- lib/opener/constituent_parsers/base.rb
|
81
|
+
- lib/opener/constituent_parsers/base/version.rb
|
82
|
+
- opener-constituent-parser-base.gemspec
|
83
|
+
- README.md
|
84
|
+
- bin/constituent-parser-fr
|
85
|
+
- bin/constituent-parser-en
|
86
|
+
- bin/constituent-parser-es
|
87
|
+
- bin/constituent-parser-it
|
88
|
+
homepage:
|
89
|
+
licenses: []
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.9.2
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - '>='
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project:
|
107
|
+
rubygems_version: 2.1.9
|
108
|
+
signing_key:
|
109
|
+
specification_version: 4
|
110
|
+
summary: Constituent parser that supports various languages.
|
111
|
+
test_files: []
|
112
|
+
has_rdoc: yard
|