opener-constituent-parser-base 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8daa009b3c65632398a5b77dbbe75e6c4d38968c
4
+ data.tar.gz: 4e446f65248f01b84bd82e666542272b2c22fef7
5
+ SHA512:
6
+ metadata.gz: afabc3b04933f70e811278cb4fee2460c19cebf591779db2358310c79920578e28905bc15b633756035a13ebbc2907bbdfdb364cfa75be9c606d190f9807c508
7
+ data.tar.gz: 1a6ae1e2eff88c2e918ef57a0e7a13176f6ee7a8eedd122b1199e179b588d8f00efcfc6d070f4c4a6a179829eb05e0556c9647d9093aaefdbf6c88e733911efc
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/constituent-parser-base/status.png)](https://drone.io/github.com/opener-project/constituent-parser-base/latest)
2
+
3
+ # Constituent Base Parsers
4
+
5
+ This repository contains the source code of the base constituent parser which
6
+ supports the following languages:
7
+
8
+ * English
9
+ * Spanish
10
+ * Italian
11
+
12
+ The parser takes KAF documents (with <wf> elements as it needs tokenized text)
13
+ as standard input and outputs constituent syntactic analysis in treebank
14
+ format, one sentence per line. It also provides an option of outputting the
15
+ constituent heads, as defined by Collins PhD thesis.
16
+
17
+ ## Requirements
18
+
19
+ * Java 1.7 or newer
20
+ * Ruby 1.9.2 or newer
21
+
22
+ Development requirements:
23
+
24
+ * Maven
25
+ * Bundler
26
+
27
+ ## Installation
28
+
29
+ Installing as a regular Gem:
30
+
31
+ gem install opener-constituent-parser-base
32
+
33
+ Using Bundler:
34
+
35
+ gem 'opener-constituent-parser-base',
36
+ :git => 'git@github.com:opener-project/constituent-parser-base.git',
37
+ :branch => 'master'
38
+
39
+ Using specific install:
40
+
41
+ gem install specific_install
42
+ gem specific_install opener-constituent-parser-base \
43
+ -l https://github.com/opener-project/constituent-parser-base.git
44
+
45
+ ## Usage
46
+
47
+ cat some_input_file.kaf | constituent-parser-base
48
+
49
+ ## Contributing
50
+
51
+ First make sure all the required dependencies are installed:
52
+
53
+ bundle install
54
+
55
+ Then compile the required Java code:
56
+
57
+ bundle exec rake compile
58
+
59
+ For this you'll need to have Java 1.7 and Maven installed. These requirements
60
+ are verified for you before the Rake task calls Maven.
61
+
62
+ ## Testing
63
+
64
+ To run the tests (which are powered by Cucumber), simply run the following:
65
+
66
+ bundle exec rake
67
+
68
+ This will take care of verifying the requirements, installing the required Java
69
+ packages and running the tests.
70
+
71
+ For more information on the available Rake tasks run the following:
72
+
73
+ bundle exec rake -T
74
+
75
+ ## Structure
76
+
77
+ This repository comes in two parts: a collection of Java source files and Ruby
78
+ source files. The Java code can be found in the `core/` directory, everything
79
+ else will be Ruby source code.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'en')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'es')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'fr')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'it')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
Binary file
@@ -0,0 +1,96 @@
1
+ require 'open3'
2
+ require 'stringio'
3
+
4
+ require 'java'
5
+ require File.expand_path('../../../../core/target/ehu-parse-1.0.jar', __FILE__)
6
+
7
+ import 'ehu.parse.Annotate'
8
+ import 'ixa.kaflib.KAFDocument'
9
+ import 'java.io.InputStreamReader'
10
+ import 'ehu.heads.CollinsHeadFinder'
11
+
12
+ require_relative 'base/version'
13
+
14
+ module Opener
15
+ module ConstituentParsers
16
+ ##
17
+ # The base constituent parser kernel that supports multiple languages such
18
+ # as English and Spanish.
19
+ #
20
+ # @!attribute [r] args
21
+ # @return [Array]
22
+ # @!attribute [r] options
23
+ # @return [Hash]
24
+ #
25
+ class Base
26
+ attr_reader :args, :options
27
+
28
+ ##
29
+ # The default language to use.
30
+ #
31
+ # @return [String]
32
+ #
33
+ DEFAULT_LANGUAGE = 'en'.freeze
34
+
35
+ ##
36
+ # Hash containing the default options to use.
37
+ #
38
+ # @return [Hash]
39
+ #
40
+ DEFAULT_OPTIONS = {
41
+ :args => [],
42
+ :language => DEFAULT_LANGUAGE
43
+ }.freeze
44
+
45
+ ##
46
+ # @param [Hash] options
47
+ #
48
+ # @option options [Array] :args The commandline arguments to pass to the
49
+ # underlying Java code.
50
+ #
51
+ # @see Opener::ConstituentParsers::DEFAULT_OPTIONS
52
+ #
53
+ def initialize(options = {})
54
+ options = DEFAULT_OPTIONS.merge(options)
55
+ @args = options.delete(:args) || []
56
+ @options = options
57
+ end
58
+
59
+ ##
60
+ # Runs the command and returns the output of STDOUT, STDERR and the
61
+ # process information.
62
+ #
63
+ # @param [String] input The input to process.
64
+ # @return [Array]
65
+ #
66
+ def run(input)
67
+ input = StringIO.new(input) unless input.kind_of?(IO)
68
+ annotator = Java::ehu.parse.Annotate.new(language)
69
+ reader = InputStreamReader.new(input.to_inputstream)
70
+ kaf = KAFDocument.create_from_stream(reader)
71
+ kaf.add_linguistic_processor("constituents","ehu-parse-"+language,"now","1.0")
72
+
73
+ if heads?
74
+ head_finder = CollinsHeadFinder.new(language)
75
+ annotator.parseWithHeads(kaf, head_finder)
76
+ else
77
+ annotator.parse(kaf)
78
+ end
79
+
80
+ return kaf.to_string
81
+ end
82
+ #
83
+ ##
84
+ # @return [String]
85
+ #
86
+ def language
87
+ return options[:language]
88
+ end
89
+
90
+ def heads?
91
+ true
92
+ end
93
+
94
+ end # Base
95
+ end # ConstituentParsers
96
+ end # Opener
@@ -0,0 +1,7 @@
1
+ module Opener
2
+ module ConstituentParsers
3
+ class Base
4
+ VERSION = '1.0.0'
5
+ end # Base
6
+ end # ConstituentParsers
7
+ end # Opener
@@ -0,0 +1,29 @@
1
+ require File.expand_path(
2
+ '../lib/opener/constituent_parsers/base/version',
3
+ __FILE__
4
+ )
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = 'opener-constituent-parser-base'
8
+ gem.version = Opener::ConstituentParsers::Base::VERSION
9
+ gem.authors = ['development@olery.com']
10
+ gem.summary = 'Constituent parser that supports various languages.'
11
+ gem.description = gem.summary
12
+ gem.has_rdoc = 'yard'
13
+
14
+ gem.required_ruby_version = '>= 1.9.2'
15
+
16
+ gem.files = Dir.glob([
17
+ 'core/target/ehu-parse-*.jar',
18
+ 'lib/**/*.*',
19
+ '*.gemspec',
20
+ 'README.md'
21
+ ])
22
+
23
+ gem.executables = Dir.glob('bin/*').map { |f| File.basename(f) }
24
+
25
+ gem.add_development_dependency 'opener-build-tools'
26
+ gem.add_development_dependency 'rspec'
27
+ gem.add_development_dependency 'cucumber'
28
+ gem.add_development_dependency 'rake'
29
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-constituent-parser-base
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - development@olery.com
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: opener-build-tools
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: cucumber
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ prerelease: false
54
+ type: :development
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ prerelease: false
68
+ type: :development
69
+ description: Constituent parser that supports various languages.
70
+ email:
71
+ executables:
72
+ - constituent-parser-fr
73
+ - constituent-parser-en
74
+ - constituent-parser-es
75
+ - constituent-parser-it
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - core/target/ehu-parse-1.0.jar
80
+ - lib/opener/constituent_parsers/base.rb
81
+ - lib/opener/constituent_parsers/base/version.rb
82
+ - opener-constituent-parser-base.gemspec
83
+ - README.md
84
+ - bin/constituent-parser-fr
85
+ - bin/constituent-parser-en
86
+ - bin/constituent-parser-es
87
+ - bin/constituent-parser-it
88
+ homepage:
89
+ licenses: []
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - '>='
98
+ - !ruby/object:Gem::Version
99
+ version: 1.9.2
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.1.9
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Constituent parser that supports various languages.
111
+ test_files: []
112
+ has_rdoc: yard