mitier 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f2068109131d7766f5798f6a8e8f50998aa5e712
4
+ data.tar.gz: 72459c0c285b85588347c8eb37c7f22d7e280810
5
+ SHA512:
6
+ metadata.gz: a71ede38f13145faef3e84615a08059b01e328464a071ce450e3308170873ef0c016fe2d7e77a526fe3ff2f050c5bd9854ee45f0b4254630835ca3da7a493ec1
7
+ data.tar.gz: 0cfb3453e973f5f6f3dad77de1068992dcc771d03a89607d57442ed7a4f180bd58b0b5c17f4a1442e9bd3c9ca558e8660787c025ad03ea4f3570438b513a62eb
@@ -0,0 +1 @@
1
+ TEST_MODEL_PATH=./MITIE-models/english/ner_model.dat
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
11
+ .env
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,2 @@
1
+ Documentation:
2
+ Enabled: false
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
4
+ before_install: gem install bundler -v 1.11.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in mitier.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Marko Satek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,60 @@
1
+ # Mitier
2
+
3
+ Simple FFI wrapper for MIT's MITIE library. Currently only wraps named entity
4
+ extraction part.
5
+
6
+ For details on what MITIE is and does visit [it's GitHub page](https://github.com/mit-nlp/MITIE).
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'mitier'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install mitier
23
+
24
+ ## Usage
25
+
26
+ To use the gem make sure you have MITIE compiled and libmitie shared library placed where it can be found by the system.
27
+ Also, you are going to need trained models. Download links can be found on [MITIE GitHub page](https://github.com/mit-nlp/MITIE)
28
+
29
+ To run named entity recognition:
30
+
31
+ ```ruby
32
+ extractor = Mitier::Extractor.new(TRAINED_MODEL_PATH).load
33
+ extractor.process_ner SOME_TEXT
34
+ ```
35
+
36
+ If you only want to run text tokenizer:
37
+
38
+ ```ruby
39
+ tokenizer = Mitier::Tokenizer.new SOME_TEXT
40
+ tokenizer.process
41
+ ```
42
+
43
+ ## Development
44
+
45
+ To run the specs environment variable `TEST_MODEL_PATH` needs to be set and then run `bundle exec rspec`. Environment variables are loaded with Dotvim so you need to have `.env` file with that variable present. There is `.env.example` in the repo.
46
+
47
+ You can also run `bin/console` for an interactive prompt that will allow you to experiment.
48
+
49
+ To install this gem onto your local machine, run `bundle exec rake install`.
50
+
51
+
52
+ ## Contributing
53
+
54
+ Bug reports and pull requests are welcome on GitHub at https://github.com/satek/mitier.
55
+
56
+
57
+ ## License
58
+
59
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
60
+
@@ -0,0 +1,6 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'mitier'
5
+ require 'dotenv'
6
+ require 'pry'
7
+
8
+ Dotenv.load
9
+ Pry.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,9 @@
1
+ require 'mitier/version'
2
+ require 'ffi'
3
+ require 'mitier/wrapper'
4
+ require 'mitier/extractor'
5
+ require 'mitier/tokenizer'
6
+ require 'mitier/ner'
7
+
8
+ module Mitier
9
+ end
@@ -0,0 +1,21 @@
1
+ module Mitier
2
+ class Extractor
3
+ attr_accessor :path
4
+
5
+ class ModelNotLoaded < Exception; end
6
+
7
+ def initialize(path)
8
+ @path = path
9
+ end
10
+
11
+ def load
12
+ @extractor = Mitier::Wrapper.mitie_load_named_entity_extractor @path
13
+ self
14
+ end
15
+
16
+ def process_ner(text)
17
+ raise ModelNotLoaded unless @extractor
18
+ Ner.new(@extractor, text).process
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,51 @@
1
+ module Mitier
2
+ class Ner < Tokenizer
3
+ attr_accessor :detections
4
+
5
+ def initialize(extractor, text)
6
+ super text
7
+ @extractor = extractor
8
+ end
9
+
10
+ def process
11
+ check_text { return self }
12
+ tokens_ptr = tokenize
13
+ detections_ptr = detect tokens_ptr
14
+ process_token_elements tokens_ptr
15
+ process_detections detections_ptr
16
+ self
17
+ end
18
+
19
+ private
20
+
21
+ attr_accessor :extractor
22
+
23
+ def detect(tokens_ptr)
24
+ Wrapper.mitie_extract_entities extractor, tokens_ptr
25
+ end
26
+
27
+ def process_detections(ptr)
28
+ num = Wrapper.mitie_ner_get_num_detections ptr
29
+ @detections = (0...num).map { |elem| detection_attrs(ptr, elem) }
30
+ end
31
+
32
+ def detection_attrs(ptr, nr)
33
+ { tokens: detection_tokens(ptr, nr),
34
+ tagstr: Wrapper.mitie_ner_get_detection_tagstr(ptr, nr),
35
+ tag: Wrapper.mitie_ner_get_detection_tag(ptr, nr),
36
+ score: Wrapper.mitie_ner_get_detection_score(ptr, nr) }
37
+ end
38
+
39
+ def detection_tokens(ptr, nr)
40
+ pos = Wrapper.mitie_ner_get_detection_position ptr, nr
41
+ len = Wrapper.mitie_ner_get_detection_length ptr, nr
42
+ (pos...(pos + len)).map { |elem| tokens[elem] }
43
+ end
44
+
45
+ def check_text
46
+ return unless text.empty?
47
+ @tokens = @detections = []
48
+ yield
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,30 @@
1
+ module Mitier
2
+ class Tokenizer
3
+ attr_accessor :tokens, :text
4
+
5
+ def initialize(text)
6
+ @text = text.to_s.strip
7
+ end
8
+
9
+ def process
10
+ return [] if text.empty?
11
+ tokens_ptr = tokenize
12
+ process_token_elements tokens_ptr
13
+ end
14
+
15
+ private
16
+
17
+ def tokenize
18
+ Wrapper.mitie_tokenize text
19
+ end
20
+
21
+ def process_token_elements(ptr)
22
+ @tokens = [].tap do |elements|
23
+ until (element = ptr.read_pointer).null?
24
+ elements << element.read_string
25
+ ptr += FFI::Type::POINTER.size
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,3 @@
1
+ module Mitier
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1,17 @@
1
+ module Mitier
2
+ module Wrapper
3
+ extend FFI::Library
4
+ ffi_lib 'mitie'
5
+ attach_function :mitie_tokenize_file, [:string], :pointer
6
+ attach_function :mitie_tokenize, [:string], :pointer
7
+ attach_function :mitie_load_named_entity_extractor, [:string], :pointer
8
+ attach_function :mitie_extract_entities, [:pointer, :pointer], :pointer
9
+ attach_function :mitie_ner_get_num_detections, [:pointer], :ulong
10
+ attach_function :mitie_ner_get_detection_tagstr, [:pointer, :ulong], :string
11
+ attach_function :mitie_ner_get_detection_tag, [:pointer, :ulong], :ulong
12
+ attach_function :mitie_ner_get_detection_score, [:pointer, :ulong], :float
13
+ attach_function :mitie_ner_get_detection_length, [:pointer, :ulong], :ulong
14
+ attach_function :mitie_ner_get_detection_position,
15
+ [:pointer, :ulong], :ulong
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'mitier/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'mitier'
8
+ spec.version = Mitier::VERSION
9
+ spec.authors = ['Marko Satek']
10
+ spec.email = ['satekm@gmail.com']
11
+
12
+ spec.summary = 'MITIE library wrapped in Ruby with FFI. See https://github.com/mit-nlp/MITIE'
13
+ spec.homepage = 'http://github.com/satek/mitier'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`
17
+ .split("\x0")
18
+ .reject { |f| f.match(%r{^(test|spec|features)/}) }
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'ffi', '~> 1.9.10'
22
+
23
+ spec.add_development_dependency 'bundler', '~> 1.11'
24
+ spec.add_development_dependency 'rake', '~> 10.0'
25
+ spec.add_development_dependency 'rspec', '~> 3.0'
26
+ spec.add_development_dependency 'pry', '~> 0.10.3'
27
+ spec.add_development_dependency 'dotenv', '~> 2.1.1'
28
+ spec.add_development_dependency 'rubocop', '~> 0.39.0'
29
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mitier
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Marko Satek
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-05-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.9.10
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.9.10
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.11'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.11'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.10.3
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.10.3
83
+ - !ruby/object:Gem::Dependency
84
+ name: dotenv
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 2.1.1
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 2.1.1
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.39.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.39.0
111
+ description:
112
+ email:
113
+ - satekm@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".env.example"
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - ".rubocop.yml"
122
+ - ".travis.yml"
123
+ - Gemfile
124
+ - LICENSE.txt
125
+ - README.md
126
+ - Rakefile
127
+ - bin/console
128
+ - bin/setup
129
+ - lib/mitier.rb
130
+ - lib/mitier/extractor.rb
131
+ - lib/mitier/ner.rb
132
+ - lib/mitier/tokenizer.rb
133
+ - lib/mitier/version.rb
134
+ - lib/mitier/wrapper.rb
135
+ - mitier.gemspec
136
+ homepage: http://github.com/satek/mitier
137
+ licenses:
138
+ - MIT
139
+ metadata: {}
140
+ post_install_message:
141
+ rdoc_options: []
142
+ require_paths:
143
+ - lib
144
+ required_ruby_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 2.5.1
157
+ signing_key:
158
+ specification_version: 4
159
+ summary: MITIE library wrapped in Ruby with FFI. See https://github.com/mit-nlp/MITIE
160
+ test_files: []