opener-constituent-parser-base 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8daa009b3c65632398a5b77dbbe75e6c4d38968c
4
+ data.tar.gz: 4e446f65248f01b84bd82e666542272b2c22fef7
5
+ SHA512:
6
+ metadata.gz: afabc3b04933f70e811278cb4fee2460c19cebf591779db2358310c79920578e28905bc15b633756035a13ebbc2907bbdfdb364cfa75be9c606d190f9807c508
7
+ data.tar.gz: 1a6ae1e2eff88c2e918ef57a0e7a13176f6ee7a8eedd122b1199e179b588d8f00efcfc6d070f4c4a6a179829eb05e0556c9647d9093aaefdbf6c88e733911efc
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/constituent-parser-base/status.png)](https://drone.io/github.com/opener-project/constituent-parser-base/latest)
2
+
3
+ # Constituent Base Parsers
4
+
5
+ This repository contains the source code of the base constituent parser which
6
+ supports the following languages:
7
+
8
+ * English
9
+ * Spanish
10
+ * Italian
11
+
12
+ The parser takes KAF documents (with <wf> elements as it needs tokenized text)
13
+ as standard input and outputs constituent syntactic analysis in treebank
14
+ format, one sentence per line. It also provides an option of outputting the
15
+ constituent heads, as defined by Collins PhD thesis.
16
+
17
+ ## Requirements
18
+
19
+ * Java 1.7 or newer
20
+ * Ruby 1.9.2 or newer
21
+
22
+ Development requirements:
23
+
24
+ * Maven
25
+ * Bundler
26
+
27
+ ## Installation
28
+
29
+ Installing as a regular Gem:
30
+
31
+ gem install opener-constituent-parser-base
32
+
33
+ Using Bundler:
34
+
35
+ gem 'opener-constituent-parser-base',
36
+ :git => 'git@github.com:opener-project/constituent-parser-base.git',
37
+ :branch => 'master'
38
+
39
+ Using specific install:
40
+
41
+ gem install specific_install
42
+ gem specific_install opener-constituent-parser-base \
43
+ -l https://github.com/opener-project/constituent-parser-base.git
44
+
45
+ ## Usage
46
+
47
+ cat some_input_file.kaf | constituent-parser-base
48
+
49
+ ## Contributing
50
+
51
+ First make sure all the required dependencies are installed:
52
+
53
+ bundle install
54
+
55
+ Then compile the required Java code:
56
+
57
+ bundle exec rake compile
58
+
59
+ For this you'll need to have Java 1.7 and Maven installed. These requirements
60
+ are verified for you before the Rake task calls Maven.
61
+
62
+ ## Testing
63
+
64
+ To run the tests (which are powered by Cucumber), simply run the following:
65
+
66
+ bundle exec rake
67
+
68
+ This will take care of verifying the requirements, installing the required Java
69
+ packages and running the tests.
70
+
71
+ For more information on the available Rake tasks run the following:
72
+
73
+ bundle exec rake -T
74
+
75
+ ## Structure
76
+
77
+ This repository comes in two parts: a collection of Java source files and Ruby
78
+ source files. The Java code can be found in the `core/` directory, everything
79
+ else will be Ruby source code.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'en')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'es')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'fr')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/base'
4
+
5
+ kernel = Opener::ConstituentParsers::Base.new(:args => ARGV, :language => 'it')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
Binary file
@@ -0,0 +1,96 @@
1
+ require 'open3'
2
+ require 'stringio'
3
+
4
+ require 'java'
5
+ require File.expand_path('../../../../core/target/ehu-parse-1.0.jar', __FILE__)
6
+
7
+ import 'ehu.parse.Annotate'
8
+ import 'ixa.kaflib.KAFDocument'
9
+ import 'java.io.InputStreamReader'
10
+ import 'ehu.heads.CollinsHeadFinder'
11
+
12
+ require_relative 'base/version'
13
+
14
+ module Opener
15
+ module ConstituentParsers
16
+ ##
17
+ # The base constituent parser kernel that supports multiple languages such
18
+ # as English and Spanish.
19
+ #
20
+ # @!attribute [r] args
21
+ # @return [Array]
22
+ # @!attribute [r] options
23
+ # @return [Hash]
24
+ #
25
+ class Base
26
+ attr_reader :args, :options
27
+
28
+ ##
29
+ # The default language to use.
30
+ #
31
+ # @return [String]
32
+ #
33
+ DEFAULT_LANGUAGE = 'en'.freeze
34
+
35
+ ##
36
+ # Hash containing the default options to use.
37
+ #
38
+ # @return [Hash]
39
+ #
40
+ DEFAULT_OPTIONS = {
41
+ :args => [],
42
+ :language => DEFAULT_LANGUAGE
43
+ }.freeze
44
+
45
+ ##
46
+ # @param [Hash] options
47
+ #
48
+ # @option options [Array] :args The commandline arguments to pass to the
49
+ # underlying Java code.
50
+ #
51
+ # @see Opener::ConstituentParsers::DEFAULT_OPTIONS
52
+ #
53
+ def initialize(options = {})
54
+ options = DEFAULT_OPTIONS.merge(options)
55
+ @args = options.delete(:args) || []
56
+ @options = options
57
+ end
58
+
59
+ ##
60
+ # Runs the command and returns the output of STDOUT, STDERR and the
61
+ # process information.
62
+ #
63
+ # @param [String] input The input to process.
64
+ # @return [Array]
65
+ #
66
+ def run(input)
67
+ input = StringIO.new(input) unless input.kind_of?(IO)
68
+ annotator = Java::ehu.parse.Annotate.new(language)
69
+ reader = InputStreamReader.new(input.to_inputstream)
70
+ kaf = KAFDocument.create_from_stream(reader)
71
+ kaf.add_linguistic_processor("constituents","ehu-parse-"+language,"now","1.0")
72
+
73
+ if heads?
74
+ head_finder = CollinsHeadFinder.new(language)
75
+ annotator.parseWithHeads(kaf, head_finder)
76
+ else
77
+ annotator.parse(kaf)
78
+ end
79
+
80
+ return kaf.to_string
81
+ end
82
+ #
83
+ ##
84
+ # @return [String]
85
+ #
86
+ def language
87
+ return options[:language]
88
+ end
89
+
90
+ def heads?
91
+ true
92
+ end
93
+
94
+ end # Base
95
+ end # ConstituentParsers
96
+ end # Opener
@@ -0,0 +1,7 @@
1
+ module Opener
2
+ module ConstituentParsers
3
+ class Base
4
+ VERSION = '1.0.0'
5
+ end # Base
6
+ end # ConstituentParsers
7
+ end # Opener
@@ -0,0 +1,29 @@
1
+ require File.expand_path(
2
+ '../lib/opener/constituent_parsers/base/version',
3
+ __FILE__
4
+ )
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = 'opener-constituent-parser-base'
8
+ gem.version = Opener::ConstituentParsers::Base::VERSION
9
+ gem.authors = ['development@olery.com']
10
+ gem.summary = 'Constituent parser that supports various languages.'
11
+ gem.description = gem.summary
12
+ gem.has_rdoc = 'yard'
13
+
14
+ gem.required_ruby_version = '>= 1.9.2'
15
+
16
+ gem.files = Dir.glob([
17
+ 'core/target/ehu-parse-*.jar',
18
+ 'lib/**/*.*',
19
+ '*.gemspec',
20
+ 'README.md'
21
+ ])
22
+
23
+ gem.executables = Dir.glob('bin/*').map { |f| File.basename(f) }
24
+
25
+ gem.add_development_dependency 'opener-build-tools'
26
+ gem.add_development_dependency 'rspec'
27
+ gem.add_development_dependency 'cucumber'
28
+ gem.add_development_dependency 'rake'
29
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-constituent-parser-base
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - development@olery.com
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: opener-build-tools
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: cucumber
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ prerelease: false
54
+ type: :development
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ prerelease: false
68
+ type: :development
69
+ description: Constituent parser that supports various languages.
70
+ email:
71
+ executables:
72
+ - constituent-parser-fr
73
+ - constituent-parser-en
74
+ - constituent-parser-es
75
+ - constituent-parser-it
76
+ extensions: []
77
+ extra_rdoc_files: []
78
+ files:
79
+ - core/target/ehu-parse-1.0.jar
80
+ - lib/opener/constituent_parsers/base.rb
81
+ - lib/opener/constituent_parsers/base/version.rb
82
+ - opener-constituent-parser-base.gemspec
83
+ - README.md
84
+ - bin/constituent-parser-fr
85
+ - bin/constituent-parser-en
86
+ - bin/constituent-parser-es
87
+ - bin/constituent-parser-it
88
+ homepage:
89
+ licenses: []
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - '>='
98
+ - !ruby/object:Gem::Version
99
+ version: 1.9.2
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - '>='
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubyforge_project:
107
+ rubygems_version: 2.1.9
108
+ signing_key:
109
+ specification_version: 4
110
+ summary: Constituent parser that supports various languages.
111
+ test_files: []
112
+ has_rdoc: yard