opener-ner-base 2.0.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b1588124c7668723220e5ed08bd40fb51e6277a1
4
- data.tar.gz: cecd424beb6731ac335ca61022730cb414f859d1
3
+ metadata.gz: a9c3cbf54f8eb38a77510e677e030d0b0aa0ea10
4
+ data.tar.gz: cd75f167bc72ec29f432191579c5fc149346946e
5
5
  SHA512:
6
- metadata.gz: 322927109650e537a4c6b7cee2346393f149ad206b066038362bbc0b2e16249b8c762fcd7fa1675b96ccbc61fa0c6d9cfabe2f59ad83b06bca83f895664bbe76
7
- data.tar.gz: 12385007e9301e2fbd4c7a4362e8ae6918a33f4b27858c17d8c5cb5e8ffa6e74cec6d9a68e8292f85e05b394339ab4d85ffa873a10dab0cdb47f53488e9e0aec
6
+ metadata.gz: e8760fedd8f3bb7daf95f9597f984fdc4f38b29bf38271b6206dcfae17f360d18e77fff9f483fa34c3b8d517b8b5ebb6e6eed42af7f46f6d8735a0a38ffad2b9
7
+ data.tar.gz: 6c912e843758644975ffe9f793efc084ebf1d73401edfaf3b575dc0d5f9d487360e1f6777ddfe0556ab7ba8f3f002318bccf8de7bff6848a78005ece20938bea
@@ -0,0 +1,13 @@
1
+ Copyright 2014 OpeNER Project Consortium
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -2,7 +2,7 @@
2
2
 
3
3
  require_relative '../lib/opener/ners/base'
4
4
 
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'de')
5
+ kernel = Opener::Ners::Base.new
6
6
  input = STDIN.tty? ? nil : STDIN.read
7
7
 
8
8
  puts kernel.run(input)
@@ -1,11 +1,8 @@
1
1
  require 'open3'
2
- require 'java'
3
2
  require 'stringio'
3
+ require 'nokogiri'
4
4
 
5
- require File.expand_path("../../../../core/target/ehu-nerc-1.0.jar", __FILE__)
6
-
7
- import 'ixa.kaflib.KAFDocument'
8
- import 'java.io.InputStreamReader'
5
+ require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.1.0.jar", __FILE__)
9
6
 
10
7
  require_relative 'base/version'
11
8
 
@@ -14,23 +11,64 @@ module Opener
14
11
  ##
15
12
  # Base NER class that supports various languages such as Dutch and English.
16
13
  #
17
- # @!attribute [r] args
18
- # @return [Array]
19
14
  # @!attribute [r] options
20
15
  # @return [Hash]
21
16
  #
17
+ # @!attribute [r] features
18
+ # @return [String]
19
+ #
20
+ # @!attribute [r] beamsize
21
+ # @return [Fixnum]
22
+ #
23
+ # @!attribute [r] dictionaries
24
+ # @return [String]
25
+ #
26
+ # @!attribute [r] dictionaries_path
27
+ # @return [String]
28
+ #
29
+ # @!attribute [r] lexer
30
+ # @return [Fixnum]
31
+ #
32
+ # @!attribute [r] model
33
+ # @return [String]
34
+ #
35
+ # @!attribute [r] enable_time
36
+ # @return [TrueClass|FalseClass]
37
+ #
22
38
  class Base
23
- attr_reader :args, :options
39
+ attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
40
+ :lexer, :model, :enable_time
24
41
 
25
42
  ##
26
43
  # @param [Hash] options
27
44
  #
28
- # @option options [Array] :args The commandline arguments to pass to the
29
- # underlying Java code.
45
+ # @option options [String] :features The NERC feature to use, defaults to
46
+ # "baseline".
47
+ #
48
+ # @option options [Fixnum] :beamsize The beam size for decoding, defaults
49
+ # to 3.
50
+ #
51
+ # @option options [String] :dictionaries The dictionary to use, if any.
52
+ #
53
+ # @option options [String] :dictionaries_path The path to the
54
+ # dictionaries.
55
+ #
56
+ # @option options [Fixnum] :lexer The lexer rules to use for NERC
57
+ # tagging.
58
+ #
59
+ # @option options [String] :model The model to use for NERC annotation.
60
+ #
61
+ # @option options [TrueClass|FalseClass] :enable_time Whether or not to
62
+ # enable dynamic timestamps (enabled by default).
30
63
  #
31
64
  def initialize(options = {})
32
- @args = options.delete(:args) || []
33
- @options = options
65
+ @dictionaries = options[:dictionaries]
66
+ @dictionaries_path = options[:dictionaries_path]
67
+ @features = options.fetch(:features, 'baseline')
68
+ @beamsize = options.fetch(:beamsize, 3)
69
+ @lexer = options[:lexer]
70
+ @model = options.fetch(:model, 'default')
71
+ @enable_time = options.fetch(:enable_time, true)
34
72
  end
35
73
 
36
74
  ##
@@ -41,24 +79,50 @@ module Opener
41
79
  # @return [Array]
42
80
  #
43
81
  def run(input)
44
- input = StringIO.new(input) unless input.kind_of?(IO)
45
- annotator = Java::ehu.nerc.Annotate.new(language)
46
- reader = InputStreamReader.new(input.to_inputstream)
47
- kaf = KAFDocument.create_from_stream(reader)
82
+ lang = language_from_kaf(input)
83
+ kaf = new_kaf_document(input)
84
+ args = [lang, model, features, beamsize]
48
85
 
49
- kaf.add_linguistic_processor("entities","ehu-nerc-"+language,"now","1.0")
50
- annotator.annotateNEsToKAF(kaf)
86
+ if use_dictionaries?
87
+ args += [dictionaries, dictionaries_path, lexer]
88
+ end
89
+
90
+ annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)
91
+
92
+ annotator.annotate_kaf(enable_time, kaf)
51
93
 
52
94
  return kaf.to_string
53
95
  end
54
96
 
55
97
  ##
56
- # @return [String]
98
+ # @param [String] input The input KAF document as a string.
99
+ # @return [Java::ixa.kaflib.KAFDocument]
100
+ #
101
+ def new_kaf_document(input)
102
+ input_io = StringIO.new(input)
103
+ reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream)
104
+
105
+ return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
106
+ end
107
+
108
+ ##
109
+ # @return [TrueClass|FalseClass]
57
110
  #
58
- def language
59
- return options[:language]
111
+ def use_dictionaries?
112
+ return dictionaries || dictionaries_path || features == 'dict'
60
113
  end
61
114
 
115
+ ##
116
+ # Returns the language for the given KAF document.
117
+ #
118
+ # @param [String] input
119
+ # @return [String]
120
+ #
121
+ def language_from_kaf(input)
122
+ document = Nokogiri::XML(input)
123
+
124
+ return document.at('KAF').attr('xml:lang')
125
+ end
62
126
  end # Base
63
127
  end # Ners
64
128
  end # Opener
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  module Ners
3
3
  class Base
4
- VERSION = '2.0.1'
4
+ VERSION = '3.0.0'
5
5
  end # Base
6
6
  end # Ners
7
7
  end # Opener
@@ -7,16 +7,20 @@ Gem::Specification.new do |gem|
7
7
  gem.summary = 'Base NER component for languages such as English.'
8
8
  gem.description = gem.summary
9
9
  gem.homepage = 'http://opener-project.github.com/'
10
+ gem.license = 'Apache 2.0'
10
11
 
11
12
  gem.files = Dir.glob([
12
- 'core/target/ehu-nerc-*.jar',
13
+ 'core/target/ixa-pipe-nerc-*.jar',
13
14
  'lib/**/*',
14
15
  '*.gemspec',
15
- 'README.md'
16
+ 'README.md',
17
+ 'LICENSE.txt'
16
18
  ]).select { |file| File.file?(file) }
17
19
 
18
20
  gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
19
21
 
22
+ gem.add_dependency 'nokogiri'
23
+
20
24
  gem.add_development_dependency 'rspec', '~> 3.0'
21
25
  gem.add_development_dependency 'cucumber'
22
26
  gem.add_development_dependency 'rake'
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-ner-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-24 00:00:00.000000000 Z
11
+ date: 2014-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :runtime
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: rspec
15
29
  version_requirements: !ruby/object:Gem::Requirement
@@ -69,28 +83,20 @@ dependencies:
69
83
  description: Base NER component for languages such as English.
70
84
  email:
71
85
  executables:
72
- - ner-nl
73
- - ner-it
74
- - ner-es
75
- - ner-en
76
- - ner-de
77
- - ner-fr
86
+ - ner-base
78
87
  extensions: []
79
88
  extra_rdoc_files: []
80
89
  files:
81
- - core/target/ehu-nerc-1.0.jar
90
+ - core/target/ixa-pipe-nerc-1.1.0.jar
82
91
  - lib/opener/ners/base.rb
83
92
  - lib/opener/ners/base/version.rb
84
93
  - opener-ner-base.gemspec
85
94
  - README.md
86
- - bin/ner-nl
87
- - bin/ner-it
88
- - bin/ner-es
89
- - bin/ner-en
90
- - bin/ner-de
91
- - bin/ner-fr
95
+ - LICENSE.txt
96
+ - bin/ner-base
92
97
  homepage: http://opener-project.github.com/
93
- licenses: []
98
+ licenses:
99
+ - Apache 2.0
94
100
  metadata: {}
95
101
  post_install_message:
96
102
  rdoc_options: []
data/bin/ner-en DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'en')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)
data/bin/ner-es DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'es')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)
data/bin/ner-fr DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'fr')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run1(input)
data/bin/ner-it DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'it')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)
data/bin/ner-nl DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require_relative '../lib/opener/ners/base'
4
-
5
- kernel = Opener::Ners::Base.new(:args => ARGV, :language => 'nl')
6
- input = STDIN.tty? ? nil : STDIN.read
7
-
8
- puts kernel.run(input)