opener-ner-base 3.0.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -3
- data/core/target/ixa-pipe-nerc-1.5.2.jar +0 -0
- data/lib/opener/ners/base.rb +54 -72
- data/lib/opener/ners/base/version.rb +1 -1
- data/models/de.bin +0 -0
- data/models/en.bin +0 -0
- data/models/es.bin +0 -0
- data/models/fr.bin +0 -0
- data/models/it.bin +0 -0
- data/models/nl.bin +0 -0
- data/opener-ner-base.gemspec +3 -1
- metadata +48 -27
- data/core/target/ixa-pipe-nerc-1.1.0.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 184b5a02199988f3fa5e798e9438b679966542e0
|
4
|
+
data.tar.gz: e60c01830e0f3e97b1a7489cb4c5acfdc9d3cd14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca7ed76f8a08c1409ed6de52a2178e766994816ebf92e89821700b0f7cddf0b1e6c0f59e0c69009fbd9877149c84d00dc60efa7ab24be22b403ed4bbb3bbdaec
|
7
|
+
data.tar.gz: 7852dd522d262eb94f9482f4a0a81422ba66bd9b762d52a5bf54b79e3192161cd0039b53a2f70f075ece4a93bfb2d59dfd6c763235be188858dbe95dd9141f47
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
[![Build Status](https://drone.io/github.com/opener-project/ner-base/status.png)](https://drone.io/github.com/opener-project/ner-base/latest)
|
2
|
-
|
3
1
|
# NER Base
|
4
2
|
|
5
3
|
This repository contains the source code used for performing Named Entity
|
@@ -42,7 +40,14 @@ Using specific install:
|
|
42
40
|
|
43
41
|
## Usage
|
44
42
|
|
45
|
-
|
43
|
+
Basic usage:
|
44
|
+
|
45
|
+
cat some_input_file.kaf | ner-base
|
46
|
+
|
47
|
+
This component ships a built-in set of models. If you have your own models you
|
48
|
+
can set the environment variable `NER_BASE_MODELS_PATH` to the directory
|
49
|
+
containing your models. Each model should be named `LANGUAGE.bin` where
|
50
|
+
`LANGUAGE` is a 2 letter language code (`nl`. `en`, etc).
|
46
51
|
|
47
52
|
## Contributing
|
48
53
|
|
Binary file
|
data/lib/opener/ners/base.rb
CHANGED
@@ -1,74 +1,36 @@
|
|
1
|
-
require 'open3'
|
2
1
|
require 'stringio'
|
3
|
-
require '
|
4
|
-
|
5
|
-
require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.1.0.jar", __FILE__)
|
2
|
+
require 'oga'
|
3
|
+
require 'opener/core'
|
6
4
|
|
7
5
|
require_relative 'base/version'
|
8
6
|
|
7
|
+
require File.expand_path("../../../../core/target/ixa-pipe-nerc-1.5.2.jar", __FILE__)
|
8
|
+
|
9
9
|
module Opener
|
10
10
|
module Ners
|
11
11
|
##
|
12
12
|
# Base NER class that supports various languages such as Dutch and English.
|
13
13
|
#
|
14
|
-
# @!attribute [r] options
|
15
|
-
# @return [Hash]
|
16
|
-
#
|
17
|
-
# @!attribute [r] features
|
18
|
-
# @return [String]
|
19
|
-
#
|
20
|
-
# @!attribute [r] beamsize
|
21
|
-
# @return [Fixnum]
|
22
|
-
#
|
23
|
-
# @!attribute [r] dictionaries
|
24
|
-
# @return [String]
|
25
|
-
#
|
26
|
-
# @!attribute [r] dictionaries_path
|
27
|
-
# @return [String]
|
28
|
-
#
|
29
|
-
# @!attribute [r] lexer
|
30
|
-
# @return [Fixnum]
|
31
|
-
#
|
32
|
-
# @!attribute [r] model
|
33
|
-
# @return [String]
|
34
|
-
#
|
35
|
-
# @!attribute [r] enable_time
|
36
|
-
# @return [TrueClass|FalseClass]
|
37
|
-
#
|
38
14
|
class Base
|
39
|
-
|
40
|
-
|
15
|
+
# The default models directory.
|
16
|
+
MODELS_PATH = File.expand_path('../../../../models', __FILE__)
|
17
|
+
|
18
|
+
# @return [String]
|
19
|
+
attr_reader :models
|
20
|
+
|
21
|
+
# @return [TrueClass|FalseClass]
|
22
|
+
attr_reader :enable_time
|
41
23
|
|
42
24
|
##
|
43
25
|
# @param [Hash] options
|
44
26
|
#
|
45
|
-
# @option options [String] :features The NERC feature to use, defaults to
|
46
|
-
# "baseline".
|
47
|
-
#
|
48
|
-
# @option options [Fixnum] :beamsize The beam size for decoding, defaults
|
49
|
-
# to 3.
|
50
|
-
#
|
51
|
-
# @option options [String] :dictionaries The dictionary to use, if any.
|
52
|
-
#
|
53
|
-
# @option options [String] :dictionaries_path The path to the
|
54
|
-
# dictionaries.
|
55
|
-
#
|
56
|
-
# @option options [Fixnum] :lexer The lexer rules to use for NERC
|
57
|
-
# tagging.
|
58
|
-
#
|
59
|
-
# @option options [String] :model The model to use for NERC annotation.
|
60
|
-
#
|
61
27
|
# @option options [TrueClass|FalseClass] :enable_time Whether or not to
|
62
28
|
# enable dynamic timestamps (enabled by default).
|
63
29
|
#
|
64
30
|
def initialize(options = {})
|
65
|
-
@
|
66
|
-
|
67
|
-
@
|
68
|
-
@beamsize = options.fetch(:beamsize, 3)
|
69
|
-
@lexer = options[:lexer]
|
70
|
-
@model = options.fetch(:model, 'default')
|
71
|
-
@enable_time = options.fetch(:enable_time, true)
|
31
|
+
@models = ENV['NER_BASE_MODELS_PATH'] || MODELS_PATH
|
32
|
+
|
33
|
+
@enable_time = options.fetch(:enable_time, true)
|
72
34
|
end
|
73
35
|
|
74
36
|
##
|
@@ -79,19 +41,16 @@ module Opener
|
|
79
41
|
# @return [Array]
|
80
42
|
#
|
81
43
|
def run(input)
|
82
|
-
lang
|
83
|
-
|
84
|
-
args = [lang, model, features, beamsize]
|
44
|
+
lang = language_from_kaf(input)
|
45
|
+
model = File.join(models, "#{lang}.bin")
|
85
46
|
|
86
|
-
|
87
|
-
args += [dictionaries, dictionaries_path, lexer]
|
88
|
-
end
|
47
|
+
raise(Core::UnsupportedLanguageError, lang) unless File.file?(model)
|
89
48
|
|
90
|
-
|
49
|
+
kaf = new_kaf_document(input)
|
50
|
+
properties = build_properties(lang, model)
|
51
|
+
annotator = Java::eus.ixa.ixa.pipe.nerc.Annotate.new(properties)
|
91
52
|
|
92
53
|
annotator.annotate_kaf(enable_time, kaf)
|
93
|
-
|
94
|
-
return kaf.to_string
|
95
54
|
end
|
96
55
|
|
97
56
|
##
|
@@ -102,14 +61,7 @@ module Opener
|
|
102
61
|
input_io = StringIO.new(input)
|
103
62
|
reader = Java::java.io.InputStreamReader.new(input_io.to_inputstream)
|
104
63
|
|
105
|
-
|
106
|
-
end
|
107
|
-
|
108
|
-
##
|
109
|
-
# @return [TrueClass|FalseClass]
|
110
|
-
#
|
111
|
-
def use_dictionaries?
|
112
|
-
return dictionaries || dictionaries_path || features == 'dict'
|
64
|
+
Java::ixa.kaflib.KAFDocument.create_from_stream(reader)
|
113
65
|
end
|
114
66
|
|
115
67
|
##
|
@@ -119,9 +71,39 @@ module Opener
|
|
119
71
|
# @return [String]
|
120
72
|
#
|
121
73
|
def language_from_kaf(input)
|
122
|
-
|
74
|
+
parser = Oga::XML::PullParser.new(input)
|
75
|
+
language = nil
|
76
|
+
|
77
|
+
parser.parse do |node|
|
78
|
+
if node.is_a?(Oga::XML::Element) and node.name == 'KAF'
|
79
|
+
language = node.get('xml:lang')
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Make sure nobody can _somehow_ inject a language such as "../../foo".
|
85
|
+
unless language =~ /\A[a-zA-Z\-_]+\z/
|
86
|
+
raise Core::UnsupportedLanguageError, language
|
87
|
+
end
|
88
|
+
|
89
|
+
language
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
# @param [String] language
|
95
|
+
# @param [String] model
|
96
|
+
def build_properties(language, model)
|
97
|
+
properties = Java::java.util.Properties.new
|
98
|
+
|
99
|
+
properties.set_property('language', language)
|
100
|
+
properties.set_property('model', model)
|
101
|
+
properties.set_property('ruleBasedOption', 'off')
|
102
|
+
properties.set_property('dictTag', 'off')
|
103
|
+
properties.set_property('dictPath', 'off')
|
104
|
+
properties.set_property('clearFeatures', 'no')
|
123
105
|
|
124
|
-
|
106
|
+
properties
|
125
107
|
end
|
126
108
|
end # Base
|
127
109
|
end # Ners
|
data/models/de.bin
ADDED
Binary file
|
data/models/en.bin
ADDED
Binary file
|
data/models/es.bin
ADDED
Binary file
|
data/models/fr.bin
ADDED
Binary file
|
data/models/it.bin
ADDED
Binary file
|
data/models/nl.bin
ADDED
Binary file
|
data/opener-ner-base.gemspec
CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |gem|
|
|
12
12
|
gem.files = Dir.glob([
|
13
13
|
'core/target/ixa-pipe-nerc-*.jar',
|
14
14
|
'lib/**/*',
|
15
|
+
'models/**/*',
|
15
16
|
'*.gemspec',
|
16
17
|
'README.md',
|
17
18
|
'LICENSE.txt'
|
@@ -19,7 +20,8 @@ Gem::Specification.new do |gem|
|
|
19
20
|
|
20
21
|
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
21
22
|
|
22
|
-
gem.add_dependency '
|
23
|
+
gem.add_dependency 'oga'
|
24
|
+
gem.add_dependency 'opener-core'
|
23
25
|
|
24
26
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
25
27
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
@@ -1,85 +1,99 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-ner-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
name: oga
|
20
|
+
prerelease: false
|
21
|
+
type: :runtime
|
15
22
|
version_requirements: !ruby/object:Gem::Requirement
|
16
23
|
requirements:
|
17
|
-
- -
|
24
|
+
- - ">="
|
18
25
|
- !ruby/object:Gem::Version
|
19
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
20
28
|
requirement: !ruby/object:Gem::Requirement
|
21
29
|
requirements:
|
22
|
-
- -
|
30
|
+
- - ">="
|
23
31
|
- !ruby/object:Gem::Version
|
24
32
|
version: '0'
|
33
|
+
name: opener-core
|
25
34
|
prerelease: false
|
26
35
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rspec
|
29
36
|
version_requirements: !ruby/object:Gem::Requirement
|
30
37
|
requirements:
|
31
|
-
- -
|
38
|
+
- - ">="
|
32
39
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
34
42
|
requirement: !ruby/object:Gem::Requirement
|
35
43
|
requirements:
|
36
|
-
- - ~>
|
44
|
+
- - "~>"
|
37
45
|
- !ruby/object:Gem::Version
|
38
46
|
version: '3.0'
|
47
|
+
name: rspec
|
39
48
|
prerelease: false
|
40
49
|
type: :development
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: cucumber
|
43
50
|
version_requirements: !ruby/object:Gem::Requirement
|
44
51
|
requirements:
|
45
|
-
- -
|
52
|
+
- - "~>"
|
46
53
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
48
56
|
requirement: !ruby/object:Gem::Requirement
|
49
57
|
requirements:
|
50
|
-
- -
|
58
|
+
- - ">="
|
51
59
|
- !ruby/object:Gem::Version
|
52
60
|
version: '0'
|
61
|
+
name: cucumber
|
53
62
|
prerelease: false
|
54
63
|
type: :development
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rake
|
57
64
|
version_requirements: !ruby/object:Gem::Requirement
|
58
65
|
requirements:
|
59
|
-
- -
|
66
|
+
- - ">="
|
60
67
|
- !ruby/object:Gem::Version
|
61
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
62
70
|
requirement: !ruby/object:Gem::Requirement
|
63
71
|
requirements:
|
64
|
-
- -
|
72
|
+
- - ">="
|
65
73
|
- !ruby/object:Gem::Version
|
66
74
|
version: '0'
|
75
|
+
name: rake
|
67
76
|
prerelease: false
|
68
77
|
type: :development
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: cliver
|
71
78
|
version_requirements: !ruby/object:Gem::Requirement
|
72
79
|
requirements:
|
73
|
-
- -
|
80
|
+
- - ">="
|
74
81
|
- !ruby/object:Gem::Version
|
75
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
76
84
|
requirement: !ruby/object:Gem::Requirement
|
77
85
|
requirements:
|
78
|
-
- -
|
86
|
+
- - ">="
|
79
87
|
- !ruby/object:Gem::Version
|
80
88
|
version: '0'
|
89
|
+
name: cliver
|
81
90
|
prerelease: false
|
82
91
|
type: :development
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
description: Base NER component for languages such as English.
|
84
98
|
email:
|
85
99
|
executables:
|
@@ -90,9 +104,15 @@ files:
|
|
90
104
|
- LICENSE.txt
|
91
105
|
- README.md
|
92
106
|
- bin/ner-base
|
93
|
-
- core/target/ixa-pipe-nerc-1.
|
107
|
+
- core/target/ixa-pipe-nerc-1.5.2.jar
|
94
108
|
- lib/opener/ners/base.rb
|
95
109
|
- lib/opener/ners/base/version.rb
|
110
|
+
- models/de.bin
|
111
|
+
- models/en.bin
|
112
|
+
- models/es.bin
|
113
|
+
- models/fr.bin
|
114
|
+
- models/it.bin
|
115
|
+
- models/nl.bin
|
96
116
|
- opener-ner-base.gemspec
|
97
117
|
homepage: http://opener-project.github.com/
|
98
118
|
licenses:
|
@@ -104,18 +124,19 @@ require_paths:
|
|
104
124
|
- lib
|
105
125
|
required_ruby_version: !ruby/object:Gem::Requirement
|
106
126
|
requirements:
|
107
|
-
- -
|
127
|
+
- - ">="
|
108
128
|
- !ruby/object:Gem::Version
|
109
129
|
version: '0'
|
110
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
131
|
requirements:
|
112
|
-
- -
|
132
|
+
- - ">="
|
113
133
|
- !ruby/object:Gem::Version
|
114
134
|
version: '0'
|
115
135
|
requirements: []
|
116
136
|
rubyforge_project:
|
117
|
-
rubygems_version: 2.
|
137
|
+
rubygems_version: 2.4.8
|
118
138
|
signing_key:
|
119
139
|
specification_version: 4
|
120
140
|
summary: Base NER component for languages such as English.
|
121
141
|
test_files: []
|
142
|
+
has_rdoc:
|
Binary file
|