opener-ner-base 2.0.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b1588124c7668723220e5ed08bd40fb51e6277a1
4
- data.tar.gz: cecd424beb6731ac335ca61022730cb414f859d1
3
+ metadata.gz: a9c3cbf54f8eb38a77510e677e030d0b0aa0ea10
4
+ data.tar.gz: cd75f167bc72ec29f432191579c5fc149346946e
5
5
  SHA512:
6
- metadata.gz: 322927109650e537a4c6b7cee2346393f149ad206b066038362bbc0b2e16249b8c762fcd7fa1675b96ccbc61fa0c6d9cfabe2f59ad83b06bca83f895664bbe76
7
- data.tar.gz: 12385007e9301e2fbd4c7a4362e8ae6918a33f4b27858c17d8c5cb5e8ffa6e74cec6d9a68e8292f85e05b394339ab4d85ffa873a10dab0cdb47f53488e9e0aec
6
+ metadata.gz: e8760fedd8f3bb7daf95f9597f984fdc4f38b29bf38271b6206dcfae17f360d18e77fff9f483fa34c3b8d517b8b5ebb6e6eed42af7f46f6d8735a0a38ffad2b9
7
+ data.tar.gz: 6c912e843758644975ffe9f793efc084ebf1d73401edfaf3b575dc0d5f9d487360e1f6777ddfe0556ab7ba8f3f002318bccf8de7bff6848a78005ece20938bea
@@ -0,0 +1,13 @@
1
+ Copyright 2014 OpeNER Project Consortium
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -2,7 +2,7 @@
2
2
 
3
3
  require_relative '../lib/opener/ners/base'
4
4
 
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'de')
5
+ kernel = Opener::Ners::Base.new
6
6
  input = STDIN.tty? ? nil : STDIN.read
7
7
 
8
8
  puts kernel.run(input)
@@ -1,11 +1,8 @@
1
1
  require 'open3'
2
- require 'java'
3
2
  require 'stringio'
3
+ require 'nokogiri'
4
4
 
5
- require File.expand_path("../../../../core/target/ehu-nerc-1.0.jar", __FILE__)
6
-
7
- import 'ixa.kaflib.KAFDocument'
8
- import 'java.io.InputStreamReader'
5
+ require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.1.0.jar", __FILE__)
9
6
 
10
7
  require_relative 'base/version'
11
8
 
@@ -14,23 +11,64 @@ module Opener
14
11
  ##
15
12
  # Base NER class that supports various languages such as Dutch and English.
16
13
  #
17
- # @!attribute [r] args
18
- # @return [Array]
19
14
  # @!attribute [r] options
20
15
  # @return [Hash]
21
16
  #
17
+ # @!attribute [r] features
18
+ # @return [String]
19
+ #
20
+ # @!attribute [r] beamsize
21
+ # @return [Fixnum]
22
+ #
23
+ # @!attribute [r] dictionaries
24
+ # @return [String]
25
+ #
26
+ # @!attribute [r] dictionaries_path
27
+ # @return [String]
28
+ #
29
+ # @!attribute [r] lexer
30
+ # @return [Fixnum]
31
+ #
32
+ # @!attribute [r] model
33
+ # @return [String]
34
+ #
35
+ # @!attribute [r] enable_time
36
+ # @return [TrueClass|FalseClass]
37
+ #
22
38
  class Base
23
- attr_reader :args, :options
39
+ attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
40
+ :lexer, :model, :enable_time
24
41
 
25
42
  ##
26
43
  # @param [Hash] options
27
44
  #
28
- # @option options [Array] :args The commandline arguments to pass to the
29
- # underlying Java code.
45
+ # @option options [String] :features The NERC feature to use, defaults to
46
+ # "baseline".
47
+ #
48
+ # @option options [Fixnum] :beamsize The beam size for decoding, defaults
49
+ # to 3.
50
+ #
51
+ # @option options [String] :dictionaries The dictionary to use, if any.
52
+ #
53
+ # @option options [String] :dictionaries_path The path to the
54
+ # dictionaries.
55
+ #
56
+ # @option options [Fixnum] :lexer The lexer rules to use for NERC
57
+ # tagging.
58
+ #
59
+ # @option options [String] :model The model to use for NERC annotation.
60
+ #
61
+ # @option options [TrueClass|FalseClass] :enable_time Whether or not to
62
+ # enable dynamic timestamps (enabled by default).
30
63
  #
31
64
  def initialize(options = {})
32
- @args = options.delete(:args) || []
33
- @options = options
65
+ @dictionaries = options[:dictionaries]
66
+ @dictionaries_path = options[:dictionaries_path]
67
+ @features = options.fetch(:features, 'baseline')
68
+ @beamsize = options.fetch(:beamsize, 3)
69
+ @lexer = options[:lexer]
70
+ @model = options.fetch(:model, 'default')
71
+ @enable_time = options.fetch(:enable_time, true)
34
72
  end
35
73
 
36
74
  ##
@@ -41,24 +79,50 @@ module Opener
41
79
  # @return [Array]
42
80
  #
43
81
  def run(input)
44
- input = StringIO.new(input) unless input.kind_of?(IO)
45
- annotator = Java::ehu.nerc.Annotate.new(language)
46
- reader = InputStreamReader.new(input.to_inputstream)
47
- kaf = KAFDocument.create_from_stream(reader)
82
+ lang = language_from_kaf(input)
83
+ kaf = new_kaf_document(input)
84
+ args = [lang, model, features, beamsize]
48
85
 
49
- kaf.add_linguistic_processor("entities","ehu-nerc-"+language,"now","1.0")
50
- annotator.annotateNEsToKAF(kaf)
86
+ if use_dictionaries?
87
+ args += [dictionaries, dictionaries_path, lexer]
88
+ end
89
+
90
+ annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)
91
+
92
+ annotator.annotate_kaf(enable_time, kaf)
51
93
 
52
94
  return kaf.to_string
53
95
  end
54
96
 
55
97
  ##
56
- # @return [String]
98
+ # @param [String] input The input KAF document as a string.
99
+ # @return [Java::ixa.kaflib.KAFDocument]
100
+ #
101
+ def new_kaf_document(input)
102
+ input_io = StringIO.new(input)
103
+ reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream)
104
+
105
+ return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
106
+ end
107
+
108
+ ##
109
+ # @return [TrueClass|FalseClass]
57
110
  #
58
- def language
59
- return options[:language]
111
+ def use_dictionaries?
112
+ return dictionaries || dictionaries_path || features == 'dict'
60
113
  end
61
114
 
115
+ ##
116
+ # Returns the language for the given KAF document.
117
+ #
118
+ # @param [String] input
119
+ # @return [String]
120
+ #
121
+ def language_from_kaf(input)
122
+ document = Nokogiri::XML(input)
123
+
124
+ return document.at('KAF').attr('xml:lang')
125
+ end
62
126
  end # Base
63
127
  end # Ners
64
128
  end # Opener
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  module Ners
3
3
  class Base
4
- VERSION = '2.0.1'
4
+ VERSION = '3.0.0'
5
5
  end # Base
6
6
  end # Ners
7
7
  end # Opener
@@ -7,16 +7,20 @@ Gem::Specification.new do |gem|
7
7
  gem.summary = 'Base NER component for languages such as English.'
8
8
  gem.description = gem.summary
9
9
  gem.homepage = 'http://opener-project.github.com/'
10
+ gem.license = 'Apache 2.0'
10
11
 
11
12
  gem.files = Dir.glob([
12
- 'core/target/ehu-nerc-*.jar',
13
+ 'core/target/ixa-pipe-nerc-*.jar',
13
14
  'lib/**/*',
14
15
  '*.gemspec',
15
- 'README.md'
16
+ 'README.md',
17
+ 'LICENSE.txt'
16
18
  ]).select { |file| File.file?(file) }
17
19
 
18
20
  gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
19
21
 
22
+ gem.add_dependency 'nokogiri'
23
+
20
24
  gem.add_development_dependency 'rspec', '~> 3.0'
21
25
  gem.add_development_dependency 'cucumber'
22
26
  gem.add_development_dependency 'rake'
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-ner-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-24 00:00:00.000000000 Z
11
+ date: 2014-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :runtime
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: rspec
15
29
  version_requirements: !ruby/object:Gem::Requirement
@@ -69,28 +83,20 @@ dependencies:
69
83
  description: Base NER component for languages such as English.
70
84
  email:
71
85
  executables:
72
- - ner-nl
73
- - ner-it
74
- - ner-es
75
- - ner-en
76
- - ner-de
77
- - ner-fr
86
+ - ner-base
78
87
  extensions: []
79
88
  extra_rdoc_files: []
80
89
  files:
81
- - core/target/ehu-nerc-1.0.jar
90
+ - core/target/ixa-pipe-nerc-1.1.0.jar
82
91
  - lib/opener/ners/base.rb
83
92
  - lib/opener/ners/base/version.rb
84
93
  - opener-ner-base.gemspec
85
94
  - README.md
86
- - bin/ner-nl
87
- - bin/ner-it
88
- - bin/ner-es
89
- - bin/ner-en
90
- - bin/ner-de
91
- - bin/ner-fr
95
+ - LICENSE.txt
96
+ - bin/ner-base
92
97
  homepage: http://opener-project.github.com/
93
- licenses: []
98
+ licenses:
99
+ - Apache 2.0
94
100
  metadata: {}
95
101
  post_install_message:
96
102
  rdoc_options: []
data/bin/ner-en DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'en')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)
data/bin/ner-es DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'es')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)
data/bin/ner-fr DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'fr')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run1(input)
data/bin/ner-it DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'it')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)
data/bin/ner-nl DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'nl')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)