mitier 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env.example +1 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.rubocop.yml +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +60 -0
- data/Rakefile +6 -0
- data/bin/console +9 -0
- data/bin/setup +8 -0
- data/lib/mitier.rb +9 -0
- data/lib/mitier/extractor.rb +21 -0
- data/lib/mitier/ner.rb +51 -0
- data/lib/mitier/tokenizer.rb +30 -0
- data/lib/mitier/version.rb +3 -0
- data/lib/mitier/wrapper.rb +17 -0
- data/mitier.gemspec +29 -0
- metadata +160 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f2068109131d7766f5798f6a8e8f50998aa5e712
|
4
|
+
data.tar.gz: 72459c0c285b85588347c8eb37c7f22d7e280810
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a71ede38f13145faef3e84615a08059b01e328464a071ce450e3308170873ef0c016fe2d7e77a526fe3ff2f050c5bd9854ee45f0b4254630835ca3da7a493ec1
|
7
|
+
data.tar.gz: 0cfb3453e973f5f6f3dad77de1068992dcc771d03a89607d57442ed7a4f180bd58b0b5c17f4a1442e9bd3c9ca558e8660787c025ad03ea4f3570438b513a62eb
|
data/.env.example
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
TEST_MODEL_PATH=./MITIE-models/english/ner_model.dat
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Marko Satek
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Mitier
|
2
|
+
|
3
|
+
Simple FFI wrapper for MIT's MITIE library. Currently only wraps named entity
|
4
|
+
extraction part.
|
5
|
+
|
6
|
+
For details on what MITIE is and does visit [it's GitHub page](https://github.com/mit-nlp/MITIE).
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'mitier'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install mitier
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
To use the gem make sure you have MITIE compiled and libmitie shared library placed where it can be found by the system.
|
27
|
+
Also, you are going to need trained models. Download links can be found on [MITIE GitHub page](https://github.com/mit-nlp/MITIE)
|
28
|
+
|
29
|
+
To run named entity recognition:
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
extractor = Mitier::Extractor.new(TRAINED_MODEL_PATH).load
|
33
|
+
extractor.process_ner SOME_TEXT
|
34
|
+
```
|
35
|
+
|
36
|
+
If you only want to run text tokenizer:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
tokenizer = Mitier::Tokenizer.new SOME_TEXT
|
40
|
+
tokenizer.process
|
41
|
+
```
|
42
|
+
|
43
|
+
## Development
|
44
|
+
|
45
|
+
To run the specs environment variable `TEST_MODEL_PATH` needs to be set and then run `bundle exec rspec`. Environment variables are loaded with Dotvim so you need to have `.env` file with that variable present. There is `.env.example` in the repo.
|
46
|
+
|
47
|
+
You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
48
|
+
|
49
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
50
|
+
|
51
|
+
|
52
|
+
## Contributing
|
53
|
+
|
54
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/satek/mitier.
|
55
|
+
|
56
|
+
|
57
|
+
## License
|
58
|
+
|
59
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
60
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
data/bin/setup
ADDED
data/lib/mitier.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module Mitier
|
2
|
+
class Extractor
|
3
|
+
attr_accessor :path
|
4
|
+
|
5
|
+
class ModelNotLoaded < Exception; end
|
6
|
+
|
7
|
+
def initialize(path)
|
8
|
+
@path = path
|
9
|
+
end
|
10
|
+
|
11
|
+
def load
|
12
|
+
@extractor = Mitier::Wrapper.mitie_load_named_entity_extractor @path
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
def process_ner(text)
|
17
|
+
raise ModelNotLoaded unless @extractor
|
18
|
+
Ner.new(@extractor, text).process
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/mitier/ner.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
module Mitier
|
2
|
+
class Ner < Tokenizer
|
3
|
+
attr_accessor :detections
|
4
|
+
|
5
|
+
def initialize(extractor, text)
|
6
|
+
super text
|
7
|
+
@extractor = extractor
|
8
|
+
end
|
9
|
+
|
10
|
+
def process
|
11
|
+
check_text { return self }
|
12
|
+
tokens_ptr = tokenize
|
13
|
+
detections_ptr = detect tokens_ptr
|
14
|
+
process_token_elements tokens_ptr
|
15
|
+
process_detections detections_ptr
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
attr_accessor :extractor
|
22
|
+
|
23
|
+
def detect(tokens_ptr)
|
24
|
+
Wrapper.mitie_extract_entities extractor, tokens_ptr
|
25
|
+
end
|
26
|
+
|
27
|
+
def process_detections(ptr)
|
28
|
+
num = Wrapper.mitie_ner_get_num_detections ptr
|
29
|
+
@detections = (0...num).map { |elem| detection_attrs(ptr, elem) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def detection_attrs(ptr, nr)
|
33
|
+
{ tokens: detection_tokens(ptr, nr),
|
34
|
+
tagstr: Wrapper.mitie_ner_get_detection_tagstr(ptr, nr),
|
35
|
+
tag: Wrapper.mitie_ner_get_detection_tag(ptr, nr),
|
36
|
+
score: Wrapper.mitie_ner_get_detection_score(ptr, nr) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def detection_tokens(ptr, nr)
|
40
|
+
pos = Wrapper.mitie_ner_get_detection_position ptr, nr
|
41
|
+
len = Wrapper.mitie_ner_get_detection_length ptr, nr
|
42
|
+
(pos...(pos + len)).map { |elem| tokens[elem] }
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_text
|
46
|
+
return unless text.empty?
|
47
|
+
@tokens = @detections = []
|
48
|
+
yield
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Mitier
|
2
|
+
class Tokenizer
|
3
|
+
attr_accessor :tokens, :text
|
4
|
+
|
5
|
+
def initialize(text)
|
6
|
+
@text = text.to_s.strip
|
7
|
+
end
|
8
|
+
|
9
|
+
def process
|
10
|
+
return [] if text.empty?
|
11
|
+
tokens_ptr = tokenize
|
12
|
+
process_token_elements tokens_ptr
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def tokenize
|
18
|
+
Wrapper.mitie_tokenize text
|
19
|
+
end
|
20
|
+
|
21
|
+
def process_token_elements(ptr)
|
22
|
+
@tokens = [].tap do |elements|
|
23
|
+
until (element = ptr.read_pointer).null?
|
24
|
+
elements << element.read_string
|
25
|
+
ptr += FFI::Type::POINTER.size
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Mitier
|
2
|
+
module Wrapper
|
3
|
+
extend FFI::Library
|
4
|
+
ffi_lib 'mitie'
|
5
|
+
attach_function :mitie_tokenize_file, [:string], :pointer
|
6
|
+
attach_function :mitie_tokenize, [:string], :pointer
|
7
|
+
attach_function :mitie_load_named_entity_extractor, [:string], :pointer
|
8
|
+
attach_function :mitie_extract_entities, [:pointer, :pointer], :pointer
|
9
|
+
attach_function :mitie_ner_get_num_detections, [:pointer], :ulong
|
10
|
+
attach_function :mitie_ner_get_detection_tagstr, [:pointer, :ulong], :string
|
11
|
+
attach_function :mitie_ner_get_detection_tag, [:pointer, :ulong], :ulong
|
12
|
+
attach_function :mitie_ner_get_detection_score, [:pointer, :ulong], :float
|
13
|
+
attach_function :mitie_ner_get_detection_length, [:pointer, :ulong], :ulong
|
14
|
+
attach_function :mitie_ner_get_detection_position,
|
15
|
+
[:pointer, :ulong], :ulong
|
16
|
+
end
|
17
|
+
end
|
data/mitier.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mitier/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'mitier'
|
8
|
+
spec.version = Mitier::VERSION
|
9
|
+
spec.authors = ['Marko Satek']
|
10
|
+
spec.email = ['satekm@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = 'MITIE library wrapped in Ruby with FFI. See https://github.com/mit-nlp/MITIE'
|
13
|
+
spec.homepage = 'http://github.com/satek/mitier'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`
|
17
|
+
.split("\x0")
|
18
|
+
.reject { |f| f.match(%r{^(test|spec|features)/}) }
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency 'ffi', '~> 1.9.10'
|
22
|
+
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
24
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
26
|
+
spec.add_development_dependency 'pry', '~> 0.10.3'
|
27
|
+
spec.add_development_dependency 'dotenv', '~> 2.1.1'
|
28
|
+
spec.add_development_dependency 'rubocop', '~> 0.39.0'
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mitier
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Marko Satek
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-05-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.9.10
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.9.10
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.11'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.11'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.10.3
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.10.3
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: dotenv
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 2.1.1
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 2.1.1
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubocop
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.39.0
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.39.0
|
111
|
+
description:
|
112
|
+
email:
|
113
|
+
- satekm@gmail.com
|
114
|
+
executables: []
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".env.example"
|
119
|
+
- ".gitignore"
|
120
|
+
- ".rspec"
|
121
|
+
- ".rubocop.yml"
|
122
|
+
- ".travis.yml"
|
123
|
+
- Gemfile
|
124
|
+
- LICENSE.txt
|
125
|
+
- README.md
|
126
|
+
- Rakefile
|
127
|
+
- bin/console
|
128
|
+
- bin/setup
|
129
|
+
- lib/mitier.rb
|
130
|
+
- lib/mitier/extractor.rb
|
131
|
+
- lib/mitier/ner.rb
|
132
|
+
- lib/mitier/tokenizer.rb
|
133
|
+
- lib/mitier/version.rb
|
134
|
+
- lib/mitier/wrapper.rb
|
135
|
+
- mitier.gemspec
|
136
|
+
homepage: http://github.com/satek/mitier
|
137
|
+
licenses:
|
138
|
+
- MIT
|
139
|
+
metadata: {}
|
140
|
+
post_install_message:
|
141
|
+
rdoc_options: []
|
142
|
+
require_paths:
|
143
|
+
- lib
|
144
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
145
|
+
requirements:
|
146
|
+
- - ">="
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '0'
|
149
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
requirements: []
|
155
|
+
rubyforge_project:
|
156
|
+
rubygems_version: 2.5.1
|
157
|
+
signing_key:
|
158
|
+
specification_version: 4
|
159
|
+
summary: MITIE library wrapped in Ruby with FFI. See https://github.com/mit-nlp/MITIE
|
160
|
+
test_files: []
|