mitie 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ea3ef115016c59ecb496ffbbe13c4ac3a2ffda6acf9392ac423103b9c3cfe634
4
+ data.tar.gz: 6eb77dd514ba3c08c30e1216921cd83619f206a658018c1d4522c598e175e8b2
5
+ SHA512:
6
+ metadata.gz: 682fb3ea1c0be1889f2e1e177204309ea7b6d2989834a4d2bae49ddf567e309b9fef9ea872746626590ec06b9aabb455067d88e696ea3a4f59a9b785b43819c9
7
+ data.tar.gz: 4260a6dff4eb613278468d9fff2ef19f99f49bed81571781b4d2ac886f39bed9356a4a20a9220fa2b31e8c12965bffac5e86b40318b28aa450ebd60a3c53c3cd
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 (2020-09-14)
2
+
3
+ - First release
@@ -0,0 +1,23 @@
1
+ Boost Software License - Version 1.0 - August 17th, 2003
2
+
3
+ Permission is hereby granted, free of charge, to any person or organization
4
+ obtaining a copy of the software and accompanying documentation covered by
5
+ this license (the "Software") to use, reproduce, display, distribute,
6
+ execute, and transmit the Software, and to prepare derivative works of the
7
+ Software, and to permit third-parties to whom the Software is furnished to
8
+ do so, all subject to the following:
9
+
10
+ The copyright notices in the Software and this entire statement, including
11
+ the above license grant, this restriction and the following disclaimer,
12
+ must be included in all copies of the Software, in whole or in part, and
13
+ all derivative works of the Software, unless such copies or derivative
14
+ works are solely in the form of machine-executable object code generated by
15
+ a source language processor.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
20
+ SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
21
+ FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
22
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
+ DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,93 @@
1
+ # MITIE
2
+
3
+ [MITIE](https://github.com/mit-nlp/MITIE) - named-entity recognition - for Ruby
4
+
5
+ ## Installation
6
+
7
+ First, install MITIE. For Homebrew, use:
8
+
9
+ ```sh
10
+ brew install mitie
11
+ ```
12
+
13
+ Add this line to your application’s Gemfile:
14
+
15
+ ```ruby
16
+ gem 'mitie'
17
+ ```
18
+
19
+ And download the pre-trained model for your language:
20
+
21
+ - [English](https://github.com/mit-nlp/MITIE/releases/download/v0.4/MITIE-models-v0.2.tar.bz2)
22
+ - [Spanish](https://github.com/mit-nlp/MITIE/releases/download/v0.4/MITIE-models-v0.2-Spanish.zip)
23
+ - [German](https://github.com/mit-nlp/MITIE/releases/download/v0.4/MITIE-models-v0.2-German.tar.bz2)
24
+
25
+ ## Getting Started
26
+
27
+ Get your text
28
+
29
+ ```ruby
30
+ text = "Nat Friedman is the CEO of GitHub, which is headquartered in San Francisco"
31
+ ```
32
+
33
+ Load an NER model
34
+
35
+ ```ruby
36
+ model = Mitie::NER.new("ner_model.dat")
37
+ ```
38
+
39
+ Get entities
40
+
41
+ ```ruby
42
+ model.entities(text)
43
+ ```
44
+
45
+ This returns
46
+
47
+ ```ruby
48
+ [
49
+ {text: "Nat Friedman", tag: "PERSON", score: 1.099661347535191, offset: 0},
50
+ {text: "GitHub", tag: "ORGANIZATION", score: 0.344641651251650, offset: 27},
51
+ {text: "San Francisco", tag: "LOCATION", score: 1.428241888939011, offset: 61}
52
+ ]
53
+ ```
54
+
55
+ Get tokens
56
+
57
+ ```ruby
58
+ model.tokens(text)
59
+ ```
60
+
61
+ Get tokens and their offset
62
+
63
+ ```ruby
64
+ model.tokens_with_offset(text)
65
+ ```
66
+
67
+ Get all tags for a model
68
+
69
+ ```ruby
70
+ model.tags
71
+ ```
72
+
73
+ ## History
74
+
75
+ View the [changelog](https://github.com/ankane/mitie/blob/master/CHANGELOG.md)
76
+
77
+ ## Contributing
78
+
79
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
80
+
81
+ - [Report bugs](https://github.com/ankane/mitie/issues)
82
+ - Fix bugs and [submit pull requests](https://github.com/ankane/mitie/pulls)
83
+ - Write, clarify, or fix documentation
84
+ - Suggest or add new features
85
+
86
+ To get started with development:
87
+
88
+ ```sh
89
+ git clone https://github.com/ankane/mitie.git
90
+ cd mitie
91
+ bundle install
92
+ MITIE_NER_PATH=path/to/ner_model.dat bundle exec rake test
93
+ ```
@@ -0,0 +1,25 @@
1
+ # stdlib
2
+ require "fiddle/import"
3
+
4
+ # modules
5
+ require "mitie/ner"
6
+ require "mitie/version"
7
+
8
+ module Mitie
9
+ class Error < StandardError; end
10
+
11
+ class << self
12
+ attr_accessor :ffi_lib
13
+ end
14
+ self.ffi_lib =
15
+ if Gem.win_platform?
16
+ ["mitie.dll"]
17
+ elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
18
+ ["libmitie.dylib"]
19
+ else
20
+ ["libmitie.so"]
21
+ end
22
+
23
+ # friendlier error message
24
+ autoload :FFI, "mitie/ffi"
25
+ end
@@ -0,0 +1,29 @@
1
+ module Mitie
2
+ module FFI
3
+ extend Fiddle::Importer
4
+
5
+ libs = Mitie.ffi_lib.dup
6
+ begin
7
+ dlload Fiddle.dlopen(libs.shift)
8
+ rescue Fiddle::DLError => e
9
+ retry if libs.any?
10
+ raise e
11
+ end
12
+
13
+ extern "void mitie_free(void* object)"
14
+ extern "char** mitie_tokenize(const char* text)"
15
+ extern "char** mitie_tokenize_with_offsets(const char* text, unsigned long** token_offsets)"
16
+
17
+ extern "mitie_named_entity_extractor* mitie_load_named_entity_extractor(const char* filename)"
18
+ extern "unsigned long mitie_get_num_possible_ner_tags(const mitie_named_entity_extractor* ner)"
19
+ extern "const char* mitie_get_named_entity_tagstr(const mitie_named_entity_extractor* ner, unsigned long idx)"
20
+
21
+ extern "mitie_named_entity_detections* mitie_extract_entities(const mitie_named_entity_extractor* ner, char** tokens)"
22
+ extern "unsigned long mitie_ner_get_num_detections(const mitie_named_entity_detections* dets)"
23
+ extern "unsigned long mitie_ner_get_detection_position(const mitie_named_entity_detections* dets, unsigned long idx)"
24
+ extern "unsigned long mitie_ner_get_detection_length(const mitie_named_entity_detections* dets, unsigned long idx)"
25
+ extern "unsigned long mitie_ner_get_detection_tag(const mitie_named_entity_detections* dets, unsigned long idx)"
26
+ extern "const char* mitie_ner_get_detection_tagstr(const mitie_named_entity_detections* dets, unsigned long idx)"
27
+ extern "double mitie_ner_get_detection_score(const mitie_named_entity_detections* dets, unsigned long idx)"
28
+ end
29
+ end
@@ -0,0 +1,91 @@
1
+ module Mitie
2
+ class NER
3
+ def initialize(path)
4
+ @pointer = FFI.mitie_load_named_entity_extractor(path)
5
+ ObjectSpace.define_finalizer(self, self.class.finalize(pointer))
6
+ end
7
+
8
+ def tags
9
+ FFI.mitie_get_num_possible_ner_tags(pointer).times.map do |i|
10
+ FFI.mitie_get_named_entity_tagstr(pointer, i).to_s
11
+ end
12
+ end
13
+
14
+ def tokens(text)
15
+ tokens = []
16
+ ptr = FFI.mitie_tokenize(text)
17
+ i = 0
18
+ loop do
19
+ token = (ptr + i * Fiddle::SIZEOF_VOIDP).ptr
20
+ break if token.null?
21
+ tokens << token.to_s.force_encoding(text.encoding)
22
+ i += 1
23
+ end
24
+ tokens
25
+ ensure
26
+ FFI.mitie_free(ptr) if ptr
27
+ end
28
+
29
+ def tokens_with_offset(text)
30
+ tokens, ptr = tokens_with_offset_with_ptr(text)
31
+ tokens
32
+ ensure
33
+ FFI.mitie_free(ptr) if ptr
34
+ end
35
+
36
+ def entities(text)
37
+ entities = []
38
+ tokens, tokens_ptr = tokens_with_offset_with_ptr(text)
39
+ detections = FFI.mitie_extract_entities(pointer, tokens_ptr)
40
+ num_detections = FFI.mitie_ner_get_num_detections(detections)
41
+ num_detections.times do |i|
42
+ pos = FFI.mitie_ner_get_detection_position(detections, i)
43
+ len = FFI.mitie_ner_get_detection_length(detections, i)
44
+ tag = FFI.mitie_ner_get_detection_tagstr(detections, i).to_s
45
+ score = FFI.mitie_ner_get_detection_score(detections, i)
46
+ tok = tokens[pos, len]
47
+ offset = tok[0][1]
48
+ finish = tok[-1][1] + tok[-1][0].size
49
+ entities << {
50
+ text: text[offset...finish],
51
+ tag: tag,
52
+ score: score,
53
+ offset: offset
54
+ }
55
+ end
56
+ entities
57
+ ensure
58
+ FFI.mitie_free(tokens_ptr) if tokens_ptr
59
+ FFI.mitie_free(detections) if detections
60
+ end
61
+
62
+ private
63
+
64
+ def pointer
65
+ @pointer
66
+ end
67
+
68
+ def tokens_with_offset_with_ptr(text)
69
+ token_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
70
+ ptr = FFI.mitie_tokenize_with_offsets(text, token_offsets)
71
+ i = 0
72
+ tokens = []
73
+ loop do
74
+ token = (ptr + i * Fiddle::SIZEOF_VOIDP).ptr
75
+ break if token.null?
76
+ offset = (token_offsets.ptr + i * Fiddle::SIZEOF_LONG).to_s(Fiddle::SIZEOF_LONG).unpack1("L!")
77
+ tokens << [token.to_s.force_encoding(text.encoding), offset]
78
+ i += 1
79
+ end
80
+ [tokens, ptr]
81
+ ensure
82
+ # use ptr, not token_offsets.ptr
83
+ FFI.mitie_free(token_offsets.ptr) if ptr
84
+ end
85
+
86
+ def self.finalize(pointer)
87
+ # must use proc instead of stabby lambda
88
+ proc { FFI.mitie_free(pointer) }
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,3 @@
1
+ module Mitie
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mitie
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-09-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '5'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '5'
55
+ description:
56
+ email: andrew@chartkick.com
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - CHANGELOG.md
62
+ - LICENSE.txt
63
+ - README.md
64
+ - lib/mitie.rb
65
+ - lib/mitie/ffi.rb
66
+ - lib/mitie/ner.rb
67
+ - lib/mitie/version.rb
68
+ homepage: https://github.com/ankane/mitie
69
+ licenses:
70
+ - BSL-1.0
71
+ metadata: {}
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '2.5'
81
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubygems_version: 3.1.2
88
+ signing_key:
89
+ specification_version: 4
90
+ summary: Named-entity recognition for Ruby
91
+ test_files: []