opener-ner-base 3.0.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -3
- data/core/target/ixa-pipe-nerc-1.5.2.jar +0 -0
- data/lib/opener/ners/base.rb +54 -72
- data/lib/opener/ners/base/version.rb +1 -1
- data/models/de.bin +0 -0
- data/models/en.bin +0 -0
- data/models/es.bin +0 -0
- data/models/fr.bin +0 -0
- data/models/it.bin +0 -0
- data/models/nl.bin +0 -0
- data/opener-ner-base.gemspec +3 -1
- metadata +48 -27
- data/core/target/ixa-pipe-nerc-1.1.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 184b5a02199988f3fa5e798e9438b679966542e0
|
4
|
+
data.tar.gz: e60c01830e0f3e97b1a7489cb4c5acfdc9d3cd14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca7ed76f8a08c1409ed6de52a2178e766994816ebf92e89821700b0f7cddf0b1e6c0f59e0c69009fbd9877149c84d00dc60efa7ab24be22b403ed4bbb3bbdaec
|
7
|
+
data.tar.gz: 7852dd522d262eb94f9482f4a0a81422ba66bd9b762d52a5bf54b79e3192161cd0039b53a2f70f075ece4a93bfb2d59dfd6c763235be188858dbe95dd9141f47
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
[](https://drone.io/github.com/opener-project/ner-base/latest)
|
2
|
-
|
3
1
|
# NER Base
|
4
2
|
|
5
3
|
This repository contains the source code used for performing Named Entity
|
@@ -42,7 +40,14 @@ Using specific install:
|
|
42
40
|
|
43
41
|
## Usage
|
44
42
|
|
45
|
-
|
43
|
+
Basic usage:
|
44
|
+
|
45
|
+
cat some_input_file.kaf | ner-base
|
46
|
+
|
47
|
+
This component ships a built-in set of models. If you have your own models you
|
48
|
+
can set the environment variable `NER_BASE_MODELS_PATH` to the directory
|
49
|
+
containing your models. Each model should be named `LANGUAGE.bin` where
|
50
|
+
`LANGUAGE` is a 2 letter language code (`nl`. `en`, etc).
|
46
51
|
|
47
52
|
## Contributing
|
48
53
|
|
Binary file
|
data/lib/opener/ners/base.rb
CHANGED
@@ -1,74 +1,36 @@
|
|
1
|
-
require 'open3'
|
2
1
|
require 'stringio'
|
3
|
-
require '
|
4
|
-
|
5
|
-
require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.1.0.jar", __FILE__)
|
2
|
+
require 'oga'
|
3
|
+
require 'opener/core'
|
6
4
|
|
7
5
|
require_relative 'base/version'
|
8
6
|
|
7
|
+
require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.5.2.jar", __FILE__)
|
8
|
+
|
9
9
|
module Opener
|
10
10
|
module Ners
|
11
11
|
##
|
12
12
|
# Base NER class that supports various languages such as Dutch and English.
|
13
13
|
#
|
14
|
-
# @!attribute [r] options
|
15
|
-
# @return [Hash]
|
16
|
-
#
|
17
|
-
# @!attribute [r] features
|
18
|
-
# @return [String]
|
19
|
-
#
|
20
|
-
# @!attribute [r] beamsize
|
21
|
-
# @return [Fixnum]
|
22
|
-
#
|
23
|
-
# @!attribute [r] dictionaries
|
24
|
-
# @return [String]
|
25
|
-
#
|
26
|
-
# @!attribute [r] dictionaries_path
|
27
|
-
# @return [String]
|
28
|
-
#
|
29
|
-
# @!attribute [r] lexer
|
30
|
-
# @return [Fixnum]
|
31
|
-
#
|
32
|
-
# @!attribute [r] model
|
33
|
-
# @return [String]
|
34
|
-
#
|
35
|
-
# @!attribute [r] enable_time
|
36
|
-
# @return [TrueClass|FalseClass]
|
37
|
-
#
|
38
14
|
class Base
|
39
|
-
|
40
|
-
|
15
|
+
# The default models directory.
|
16
|
+
MODELS_PATH = File.expand_path('../../../../models', __FILE__)
|
17
|
+
|
18
|
+
# @return [String]
|
19
|
+
attr_reader :models
|
20
|
+
|
21
|
+
# @return [TrueClass|FalseClass]
|
22
|
+
attr_reader :enable_time
|
41
23
|
|
42
24
|
##
|
43
25
|
# @param [Hash] options
|
44
26
|
#
|
45
|
-
# @option options [String] :features The NERC feature to use, defaults to
|
46
|
-
# "baseline".
|
47
|
-
#
|
48
|
-
# @option options [Fixnum] :beamsize The beam size for decoding, defaults
|
49
|
-
# to 3.
|
50
|
-
#
|
51
|
-
# @option options [String] :dictionaries The dictionary to use, if any.
|
52
|
-
#
|
53
|
-
# @option options [String] :dictionaries_path The path to the
|
54
|
-
# dictionaries.
|
55
|
-
#
|
56
|
-
# @option options [Fixnum] :lexer The lexer rules to use for NERC
|
57
|
-
# tagging.
|
58
|
-
#
|
59
|
-
# @option options [String] :model The model to use for NERC annotation.
|
60
|
-
#
|
61
27
|
# @option options [TrueClass|FalseClass] :enable_time Whether or not to
|
62
28
|
# enable dynamic timestamps (enabled by default).
|
63
29
|
#
|
64
30
|
def initialize(options = {})
|
65
|
-
@
|
66
|
-
|
67
|
-
@
|
68
|
-
@beamsize = options.fetch(:beamsize, 3)
|
69
|
-
@lexer = options[:lexer]
|
70
|
-
@model = options.fetch(:model, 'default')
|
71
|
-
@enable_time = options.fetch(:enable_time, true)
|
31
|
+
@models = ENV['NER_BASE_MODELS_PATH'] || MODELS_PATH
|
32
|
+
|
33
|
+
@enable_time = options.fetch(:enable_time, true)
|
72
34
|
end
|
73
35
|
|
74
36
|
##
|
@@ -79,19 +41,16 @@ module Opener
|
|
79
41
|
# @return [Array]
|
80
42
|
#
|
81
43
|
def run(input)
|
82
|
-
lang
|
83
|
-
|
84
|
-
args = [lang, model, features, beamsize]
|
44
|
+
lang = language_from_kaf(input)
|
45
|
+
model = File.join(models, "#{lang}.bin")
|
85
46
|
|
86
|
-
|
87
|
-
args += [dictionaries, dictionaries_path, lexer]
|
88
|
-
end
|
47
|
+
raise(Core::UnsupportedLanguageError, lang) unless File.file?(model)
|
89
48
|
|
90
|
-
|
49
|
+
kaf = new_kaf_document(input)
|
50
|
+
properties = build_properties(lang, model)
|
51
|
+
annotator = Java::eus.ixa.ixa.pipe.nerc.Annotate.new(properties)
|
91
52
|
|
92
53
|
annotator.annotate_kaf(enable_time, kaf)
|
93
|
-
|
94
|
-
return kaf.to_string
|
95
54
|
end
|
96
55
|
|
97
56
|
##
|
@@ -102,14 +61,7 @@ module Opener
|
|
102
61
|
input_io = StringIO.new(input)
|
103
62
|
reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream)
|
104
63
|
|
105
|
-
|
106
|
-
end
|
107
|
-
|
108
|
-
##
|
109
|
-
# @return [TrueClass|FalseClass]
|
110
|
-
#
|
111
|
-
def use_dictionaries?
|
112
|
-
return dictionaries || dictionaries_path || features == 'dict'
|
64
|
+
Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
|
113
65
|
end
|
114
66
|
|
115
67
|
##
|
@@ -119,9 +71,39 @@ module Opener
|
|
119
71
|
# @return [String]
|
120
72
|
#
|
121
73
|
def language_from_kaf(input)
|
122
|
-
|
74
|
+
parser = Oga::XML::PullParser.new(input)
|
75
|
+
language = nil
|
76
|
+
|
77
|
+
parser.parse do |node|
|
78
|
+
if node.is_a?(Oga::XML::Element) and node.name == 'KAF'
|
79
|
+
language = node.get('xml:lang')
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Make sure nobody can _somehow_ inject a language such as "../../foo".
|
85
|
+
unless language =~ /\A[a-zA-Z\-_]+\z/
|
86
|
+
raise Core::UnsupportedLanguageError, language
|
87
|
+
end
|
88
|
+
|
89
|
+
language
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
# @param [String] language
|
95
|
+
# @param [String] model
|
96
|
+
def build_properties(language, model)
|
97
|
+
properties = Java::java.util.Properties.new
|
98
|
+
|
99
|
+
properties.set_property('language', language)
|
100
|
+
properties.set_property('model', model)
|
101
|
+
properties.set_property('ruleBasedOption', 'off')
|
102
|
+
properties.set_property('dictTag', 'off')
|
103
|
+
properties.set_property('dictPath', 'off')
|
104
|
+
properties.set_property('clearFeatures', 'no')
|
123
105
|
|
124
|
-
|
106
|
+
properties
|
125
107
|
end
|
126
108
|
end # Base
|
127
109
|
end # Ners
|
data/models/de.bin
ADDED
Binary file
|
data/models/en.bin
ADDED
Binary file
|
data/models/es.bin
ADDED
Binary file
|
data/models/fr.bin
ADDED
Binary file
|
data/models/it.bin
ADDED
Binary file
|
data/models/nl.bin
ADDED
Binary file
|
data/opener-ner-base.gemspec
CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.files = Dir.glob([
|
13
13
|
'core/target/ixa-pipe-nerc-*.jar',
|
14
14
|
'lib/**/*',
|
15
|
+
'models/**/*',
|
15
16
|
'*.gemspec',
|
16
17
|
'README.md',
|
17
18
|
'LICENSE.txt'
|
@@ -19,7 +20,8 @@ Gem::Specification.new do |gem|
|
|
19
20
|
|
20
21
|
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
21
22
|
|
22
|
-
gem.add_dependency '
|
23
|
+
gem.add_dependency 'oga'
|
24
|
+
gem.add_dependency 'opener-core'
|
23
25
|
|
24
26
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
25
27
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
@@ -1,85 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-ner-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
name: oga
|
20
|
+
prerelease: false
|
21
|
+
type: :runtime
|
15
22
|
version_requirements: !ruby/object:Gem::Requirement
|
16
23
|
requirements:
|
17
|
-
- -
|
24
|
+
- - ">="
|
18
25
|
- !ruby/object:Gem::Version
|
19
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
20
28
|
requirement: !ruby/object:Gem::Requirement
|
21
29
|
requirements:
|
22
|
-
- -
|
30
|
+
- - ">="
|
23
31
|
- !ruby/object:Gem::Version
|
24
32
|
version: '0'
|
33
|
+
name: opener-core
|
25
34
|
prerelease: false
|
26
35
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rspec
|
29
36
|
version_requirements: !ruby/object:Gem::Requirement
|
30
37
|
requirements:
|
31
|
-
- -
|
38
|
+
- - ">="
|
32
39
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
34
42
|
requirement: !ruby/object:Gem::Requirement
|
35
43
|
requirements:
|
36
|
-
- - ~>
|
44
|
+
- - "~>"
|
37
45
|
- !ruby/object:Gem::Version
|
38
46
|
version: '3.0'
|
47
|
+
name: rspec
|
39
48
|
prerelease: false
|
40
49
|
type: :development
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: cucumber
|
43
50
|
version_requirements: !ruby/object:Gem::Requirement
|
44
51
|
requirements:
|
45
|
-
- -
|
52
|
+
- - "~>"
|
46
53
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
48
56
|
requirement: !ruby/object:Gem::Requirement
|
49
57
|
requirements:
|
50
|
-
- -
|
58
|
+
- - ">="
|
51
59
|
- !ruby/object:Gem::Version
|
52
60
|
version: '0'
|
61
|
+
name: cucumber
|
53
62
|
prerelease: false
|
54
63
|
type: :development
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
57
64
|
version_requirements: !ruby/object:Gem::Requirement
|
58
65
|
requirements:
|
59
|
-
- -
|
66
|
+
- - ">="
|
60
67
|
- !ruby/object:Gem::Version
|
61
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
62
70
|
requirement: !ruby/object:Gem::Requirement
|
63
71
|
requirements:
|
64
|
-
- -
|
72
|
+
- - ">="
|
65
73
|
- !ruby/object:Gem::Version
|
66
74
|
version: '0'
|
75
|
+
name: rake
|
67
76
|
prerelease: false
|
68
77
|
type: :development
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: cliver
|
71
78
|
version_requirements: !ruby/object:Gem::Requirement
|
72
79
|
requirements:
|
73
|
-
- -
|
80
|
+
- - ">="
|
74
81
|
- !ruby/object:Gem::Version
|
75
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
76
84
|
requirement: !ruby/object:Gem::Requirement
|
77
85
|
requirements:
|
78
|
-
- -
|
86
|
+
- - ">="
|
79
87
|
- !ruby/object:Gem::Version
|
80
88
|
version: '0'
|
89
|
+
name: cliver
|
81
90
|
prerelease: false
|
82
91
|
type: :development
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
description: Base NER component for languages such as English.
|
84
98
|
email:
|
85
99
|
executables:
|
@@ -90,9 +104,15 @@ files:
|
|
90
104
|
- LICENSE.txt
|
91
105
|
- README.md
|
92
106
|
- bin/ner-base
|
93
|
-
- core/target/ixa-pipe-nerc-1.
|
107
|
+
- core/target/ixa-pipe-nerc-1.5.2.jar
|
94
108
|
- lib/opener/ners/base.rb
|
95
109
|
- lib/opener/ners/base/version.rb
|
110
|
+
- models/de.bin
|
111
|
+
- models/en.bin
|
112
|
+
- models/es.bin
|
113
|
+
- models/fr.bin
|
114
|
+
- models/it.bin
|
115
|
+
- models/nl.bin
|
96
116
|
- opener-ner-base.gemspec
|
97
117
|
homepage: http://opener-project.github.com/
|
98
118
|
licenses:
|
@@ -104,18 +124,19 @@ require_paths:
|
|
104
124
|
- lib
|
105
125
|
required_ruby_version: !ruby/object:Gem::Requirement
|
106
126
|
requirements:
|
107
|
-
- -
|
127
|
+
- - ">="
|
108
128
|
- !ruby/object:Gem::Version
|
109
129
|
version: '0'
|
110
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
131
|
requirements:
|
112
|
-
- -
|
132
|
+
- - ">="
|
113
133
|
- !ruby/object:Gem::Version
|
114
134
|
version: '0'
|
115
135
|
requirements: []
|
116
136
|
rubyforge_project:
|
117
|
-
rubygems_version: 2.
|
137
|
+
rubygems_version: 2.4.8
|
118
138
|
signing_key:
|
119
139
|
specification_version: 4
|
120
140
|
summary: Base NER component for languages such as English.
|
121
141
|
test_files: []
|
142
|
+
has_rdoc:
|
Binary file
|