mitie 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +8 -8
- data/lib/mitie/document.rb +43 -24
- data/lib/mitie/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca692cd09ac5c4541998bf601b13e95a041ac29e5d3fffcdf1e00f4b1a5fee58
|
4
|
+
data.tar.gz: dec98a81c3e45b2e6f191883e4a331bbfc0a98d9da7630c9ee806addb2284704
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f802b2a582dc0362351f7c26441e482bc064affa03828e033a948c26a9b6a3bdb42ad216768510b507bd8be6dc0692a34a8b1a0125cfa9533e7710d5ee2c1cba
|
7
|
+
data.tar.gz: 0bf700445e710a16a871fff6d7089aa44a3a8a53420e7f9e5a7a8cf959ccf6066a5e7b59bd23a7631b2c378571e34f5cfa4d5158a6b599d91bc1caa3560b79c9
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
- Finds people, organizations, and locations in text
|
6
6
|
- Detects relationships between entities, like `PERSON` was born in `LOCATION`
|
7
7
|
|
8
|
-
[![Build Status](https://
|
8
|
+
[![Build Status](https://github.com/ankane/mitie/workflows/build/badge.svg?branch=master)](https://github.com/ankane/mitie/actions)
|
9
9
|
|
10
10
|
## Installation
|
11
11
|
|
@@ -32,7 +32,7 @@ model = Mitie::NER.new("ner_model.dat")
|
|
32
32
|
Create a document
|
33
33
|
|
34
34
|
```ruby
|
35
|
-
doc = model.doc("Nat
|
35
|
+
doc = model.doc("Nat works at GitHub in San Francisco")
|
36
36
|
```
|
37
37
|
|
38
38
|
Get entities
|
@@ -45,9 +45,9 @@ This returns
|
|
45
45
|
|
46
46
|
```ruby
|
47
47
|
[
|
48
|
-
{text: "Nat
|
49
|
-
{text: "GitHub", tag: "ORGANIZATION", score: 0.
|
50
|
-
{text: "San Francisco", tag: "LOCATION", score: 1.
|
48
|
+
{text: "Nat", tag: "PERSON", score: 0.3112371212688382, offset: 0},
|
49
|
+
{text: "GitHub", tag: "ORGANIZATION", score: 0.5660115198329334, offset: 13},
|
50
|
+
{text: "San Francisco", tag: "LOCATION", score: 1.3890524313885309, offset: 23}
|
51
51
|
]
|
52
52
|
```
|
53
53
|
|
@@ -82,13 +82,13 @@ There are 21 detectors for English. You can find them in the `binary_relations`
|
|
82
82
|
Load a detector
|
83
83
|
|
84
84
|
```ruby
|
85
|
-
detector = Mitie::BinaryRelationDetector.new("
|
85
|
+
detector = Mitie::BinaryRelationDetector.new("rel_classifier_organization.organization.place_founded.svm")
|
86
86
|
```
|
87
87
|
|
88
88
|
And create a document
|
89
89
|
|
90
90
|
```ruby
|
91
|
-
doc = model.doc("
|
91
|
+
doc = model.doc("Shopify was founded in Ottawa")
|
92
92
|
```
|
93
93
|
|
94
94
|
Get relations
|
@@ -100,7 +100,7 @@ detector.relations(doc)
|
|
100
100
|
This returns
|
101
101
|
|
102
102
|
```ruby
|
103
|
-
[{first: "
|
103
|
+
[{first: "Shopify", second: "Ottawa", score: 0.17649169745814464}]
|
104
104
|
```
|
105
105
|
|
106
106
|
## History
|
data/lib/mitie/document.rb
CHANGED
@@ -4,7 +4,7 @@ module Mitie
|
|
4
4
|
|
5
5
|
def initialize(model, text)
|
6
6
|
@model = model
|
7
|
-
@text = text
|
7
|
+
@text = text
|
8
8
|
end
|
9
9
|
|
10
10
|
def tokens
|
@@ -13,16 +13,21 @@ module Mitie
|
|
13
13
|
|
14
14
|
def tokens_with_offset
|
15
15
|
@tokens_with_offset ||= begin
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
if text.is_a?(Array)
|
17
|
+
# offsets are unknown when given tokens
|
18
|
+
text.map { |v| [v, nil] }
|
19
|
+
else
|
20
|
+
i = 0
|
21
|
+
tokens = []
|
22
|
+
loop do
|
23
|
+
token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
|
24
|
+
break if token.null?
|
25
|
+
offset = (offsets_ptr.ptr + i * Fiddle::SIZEOF_LONG).to_s(Fiddle::SIZEOF_LONG).unpack1("L!")
|
26
|
+
tokens << [token.to_s.force_encoding(text.encoding), offset]
|
27
|
+
i += 1
|
28
|
+
end
|
29
|
+
tokens
|
24
30
|
end
|
25
|
-
tokens
|
26
31
|
end
|
27
32
|
end
|
28
33
|
|
@@ -40,15 +45,20 @@ module Mitie
|
|
40
45
|
score = FFI.mitie_ner_get_detection_score(detections, i)
|
41
46
|
tok = tokens[pos, len]
|
42
47
|
offset = tok[0][1]
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
48
|
+
|
49
|
+
entity = {}
|
50
|
+
if offset
|
51
|
+
finish = tok[-1][1] + tok[-1][0].size
|
52
|
+
entity[:text] = text[offset...finish]
|
53
|
+
else
|
54
|
+
entity[:text] = tok.map(&:first)
|
55
|
+
end
|
56
|
+
entity[:tag] = tag
|
57
|
+
entity[:score] = score
|
58
|
+
entity[:offset] = offset if offset
|
59
|
+
entity[:token_index] = pos
|
60
|
+
entity[:token_length] = len
|
61
|
+
entities << entity
|
52
62
|
end
|
53
63
|
entities
|
54
64
|
ensure
|
@@ -73,13 +83,22 @@ module Mitie
|
|
73
83
|
|
74
84
|
def tokenize
|
75
85
|
@tokenize ||= begin
|
76
|
-
|
77
|
-
|
86
|
+
if text.is_a?(Array)
|
87
|
+
# malloc uses memset to set all bytes to 0
|
88
|
+
tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1))
|
89
|
+
text.size.times do |i|
|
90
|
+
tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = Fiddle::Pointer.to_ptr(text[i]).ref
|
91
|
+
end
|
92
|
+
[tokens_ptr, nil]
|
93
|
+
else
|
94
|
+
offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
95
|
+
tokens_ptr = FFI.mitie_tokenize_with_offsets(text, offsets_ptr)
|
78
96
|
|
79
|
-
|
80
|
-
|
97
|
+
ObjectSpace.define_finalizer(tokens_ptr, self.class.finalize(tokens_ptr))
|
98
|
+
ObjectSpace.define_finalizer(offsets_ptr, self.class.finalize_ptr(offsets_ptr))
|
81
99
|
|
82
|
-
|
100
|
+
[tokens_ptr, offsets_ptr]
|
101
|
+
end
|
83
102
|
end
|
84
103
|
end
|
85
104
|
|
data/lib/mitie/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mitie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '5'
|
55
|
-
description:
|
55
|
+
description:
|
56
56
|
email: andrew@chartkick.com
|
57
57
|
executables: []
|
58
58
|
extensions: []
|
@@ -75,7 +75,7 @@ homepage: https://github.com/ankane/mitie
|
|
75
75
|
licenses:
|
76
76
|
- BSL-1.0
|
77
77
|
metadata: {}
|
78
|
-
post_install_message:
|
78
|
+
post_install_message:
|
79
79
|
rdoc_options: []
|
80
80
|
require_paths:
|
81
81
|
- lib
|
@@ -90,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
90
|
- !ruby/object:Gem::Version
|
91
91
|
version: '0'
|
92
92
|
requirements: []
|
93
|
-
rubygems_version: 3.1.
|
94
|
-
signing_key:
|
93
|
+
rubygems_version: 3.1.4
|
94
|
+
signing_key:
|
95
95
|
specification_version: 4
|
96
96
|
summary: Named-entity recognition for Ruby
|
97
97
|
test_files: []
|