opener-ner-base 2.0.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +13 -0
- data/bin/{ner-de → ner-base} +1 -1
- data/core/target/{ehu-nerc-1.0.jar → ixa-pipe-nerc-1.1.0.jar} +0 -0
- data/lib/opener/ners/base.rb +85 -21
- data/lib/opener/ners/base/version.rb +1 -1
- data/opener-ner-base.gemspec +6 -2
- metadata +22 -16
- data/bin/ner-en +0 -8
- data/bin/ner-es +0 -8
- data/bin/ner-fr +0 -8
- data/bin/ner-it +0 -8
- data/bin/ner-nl +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9c3cbf54f8eb38a77510e677e030d0b0aa0ea10
|
4
|
+
data.tar.gz: cd75f167bc72ec29f432191579c5fc149346946e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8760fedd8f3bb7daf95f9597f984fdc4f38b29bf38271b6206dcfae17f360d18e77fff9f483fa34c3b8d517b8b5ebb6e6eed42af7f46f6d8735a0a38ffad2b9
|
7
|
+
data.tar.gz: 6c912e843758644975ffe9f793efc084ebf1d73401edfaf3b575dc0d5f9d487360e1f6777ddfe0556ab7ba8f3f002318bccf8de7bff6848a78005ece20938bea
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2014 OpeNER Project Consortium
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/bin/{ner-de → ner-base}
RENAMED
Binary file
|
data/lib/opener/ners/base.rb
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
require 'open3'
|
2
|
-
require 'java'
|
3
2
|
require 'stringio'
|
3
|
+
require 'nokogiri'
|
4
4
|
|
5
|
-
require File.expand_path("../../../../core/target/
|
6
|
-
|
7
|
-
import 'ixa.kaflib.KAFDocument'
|
8
|
-
import 'java.io.InputStreamReader'
|
5
|
+
require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.1.0.jar", __FILE__)
|
9
6
|
|
10
7
|
require_relative 'base/version'
|
11
8
|
|
@@ -14,23 +11,64 @@ module Opener
|
|
14
11
|
##
|
15
12
|
# Base NER class that supports various languages such as Dutch and English.
|
16
13
|
#
|
17
|
-
# @!attribute [r] args
|
18
|
-
# @return [Array]
|
19
14
|
# @!attribute [r] options
|
20
15
|
# @return [Hash]
|
21
16
|
#
|
17
|
+
# @!attribute [r] features
|
18
|
+
# @return [String]
|
19
|
+
#
|
20
|
+
# @!attribute [r] beamsize
|
21
|
+
# @return [Fixnum]
|
22
|
+
#
|
23
|
+
# @!attribute [r] dictionaries
|
24
|
+
# @return [String]
|
25
|
+
#
|
26
|
+
# @!attribute [r] dictionaries_path
|
27
|
+
# @return [String]
|
28
|
+
#
|
29
|
+
# @!attribute [r] lexer
|
30
|
+
# @return [Fixnum]
|
31
|
+
#
|
32
|
+
# @!attribute [r] model
|
33
|
+
# @return [String]
|
34
|
+
#
|
35
|
+
# @!attribute [r] enable_time
|
36
|
+
# @return [TrueClass|FalseClass]
|
37
|
+
#
|
22
38
|
class Base
|
23
|
-
attr_reader :
|
39
|
+
attr_reader :features, :beamsize, :dictionaries, :dictionaries_path,
|
40
|
+
:lexer, :model, :enable_time
|
24
41
|
|
25
42
|
##
|
26
43
|
# @param [Hash] options
|
27
44
|
#
|
28
|
-
# @option options [
|
29
|
-
#
|
45
|
+
# @option options [String] :features The NERC feature to use, defaults to
|
46
|
+
# "baseline".
|
47
|
+
#
|
48
|
+
# @option options [Fixnum] :beamsize The beam size for decoding, defaults
|
49
|
+
# to 3.
|
50
|
+
#
|
51
|
+
# @option options [String] :dictionaries The dictionary to use, if any.
|
52
|
+
#
|
53
|
+
# @option options [String] :dictionaries_path The path to the
|
54
|
+
# dictionaries.
|
55
|
+
#
|
56
|
+
# @option options [Fixnum] :lexer The lexer rules to use for NERC
|
57
|
+
# tagging.
|
58
|
+
#
|
59
|
+
# @option options [String] :model The model to use for NERC annotation.
|
60
|
+
#
|
61
|
+
# @option options [TrueClass|FalseClass] :enable_time Whether or not to
|
62
|
+
# enable dynamic timestamps (enabled by default).
|
30
63
|
#
|
31
64
|
def initialize(options = {})
|
32
|
-
@
|
33
|
-
@
|
65
|
+
@dictionaries = options[:dictionaries]
|
66
|
+
@dictionaries_path = options[:dictionaries_path]
|
67
|
+
@features = options.fetch(:features, 'baseline')
|
68
|
+
@beamsize = options.fetch(:beamsize, 3)
|
69
|
+
@lexer = options[:lexer]
|
70
|
+
@model = options.fetch(:model, 'default')
|
71
|
+
@enable_time = options.fetch(:enable_time, true)
|
34
72
|
end
|
35
73
|
|
36
74
|
##
|
@@ -41,24 +79,50 @@ module Opener
|
|
41
79
|
# @return [Array]
|
42
80
|
#
|
43
81
|
def run(input)
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
kaf = KAFDocument.create_from_stream(reader)
|
82
|
+
lang = language_from_kaf(input)
|
83
|
+
kaf = new_kaf_document(input)
|
84
|
+
args = [lang, model, features, beamsize]
|
48
85
|
|
49
|
-
|
50
|
-
|
86
|
+
if use_dictionaries?
|
87
|
+
args += [dictionaries, dictionaries_path, lexer]
|
88
|
+
end
|
89
|
+
|
90
|
+
annotator = Java::es.ehu.si.ixa.pipe.nerc.Annotate.new(*args)
|
91
|
+
|
92
|
+
annotator.annotate_kaf(enable_time, kaf)
|
51
93
|
|
52
94
|
return kaf.to_string
|
53
95
|
end
|
54
96
|
|
55
97
|
##
|
56
|
-
# @
|
98
|
+
# @param [String] input The input KAF document as a string.
|
99
|
+
# @return [Java::ixa.kaflib.KAFDocument]
|
100
|
+
#
|
101
|
+
def new_kaf_document(input)
|
102
|
+
input_io = StringIO.new(input)
|
103
|
+
reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream)
|
104
|
+
|
105
|
+
return Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
|
106
|
+
end
|
107
|
+
|
108
|
+
##
|
109
|
+
# @return [TrueClass|FalseClass]
|
57
110
|
#
|
58
|
-
def
|
59
|
-
return
|
111
|
+
def use_dictionaries?
|
112
|
+
return dictionaries || dictionaries_path || features == 'dict'
|
60
113
|
end
|
61
114
|
|
115
|
+
##
|
116
|
+
# Returns the language for the given KAF document.
|
117
|
+
#
|
118
|
+
# @param [String] input
|
119
|
+
# @return [String]
|
120
|
+
#
|
121
|
+
def language_from_kaf(input)
|
122
|
+
document = Nokogiri::XML(input)
|
123
|
+
|
124
|
+
return document.at('KAF').attr('xml:lang')
|
125
|
+
end
|
62
126
|
end # Base
|
63
127
|
end # Ners
|
64
128
|
end # Opener
|
data/opener-ner-base.gemspec
CHANGED
@@ -7,16 +7,20 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.summary = 'Base NER component for languages such as English.'
|
8
8
|
gem.description = gem.summary
|
9
9
|
gem.homepage = 'http://opener-project.github.com/'
|
10
|
+
gem.license = 'Apache 2.0'
|
10
11
|
|
11
12
|
gem.files = Dir.glob([
|
12
|
-
'core/target/
|
13
|
+
'core/target/ixa-pipe-nerc-*.jar',
|
13
14
|
'lib/**/*',
|
14
15
|
'*.gemspec',
|
15
|
-
'README.md'
|
16
|
+
'README.md',
|
17
|
+
'LICENSE.txt'
|
16
18
|
]).select { |file| File.file?(file) }
|
17
19
|
|
18
20
|
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
19
21
|
|
22
|
+
gem.add_dependency 'nokogiri'
|
23
|
+
|
20
24
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
21
25
|
gem.add_development_dependency 'cucumber'
|
22
26
|
gem.add_development_dependency 'rake'
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-ner-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '0'
|
25
|
+
prerelease: false
|
26
|
+
type: :runtime
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rspec
|
15
29
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -69,28 +83,20 @@ dependencies:
|
|
69
83
|
description: Base NER component for languages such as English.
|
70
84
|
email:
|
71
85
|
executables:
|
72
|
-
- ner-
|
73
|
-
- ner-it
|
74
|
-
- ner-es
|
75
|
-
- ner-en
|
76
|
-
- ner-de
|
77
|
-
- ner-fr
|
86
|
+
- ner-base
|
78
87
|
extensions: []
|
79
88
|
extra_rdoc_files: []
|
80
89
|
files:
|
81
|
-
- core/target/
|
90
|
+
- core/target/ixa-pipe-nerc-1.1.0.jar
|
82
91
|
- lib/opener/ners/base.rb
|
83
92
|
- lib/opener/ners/base/version.rb
|
84
93
|
- opener-ner-base.gemspec
|
85
94
|
- README.md
|
86
|
-
-
|
87
|
-
- bin/ner-
|
88
|
-
- bin/ner-es
|
89
|
-
- bin/ner-en
|
90
|
-
- bin/ner-de
|
91
|
-
- bin/ner-fr
|
95
|
+
- LICENSE.txt
|
96
|
+
- bin/ner-base
|
92
97
|
homepage: http://opener-project.github.com/
|
93
|
-
licenses:
|
98
|
+
licenses:
|
99
|
+
- Apache 2.0
|
94
100
|
metadata: {}
|
95
101
|
post_install_message:
|
96
102
|
rdoc_options: []
|
data/bin/ner-en
DELETED
data/bin/ner-es
DELETED
data/bin/ner-fr
DELETED
data/bin/ner-it
DELETED