mitie 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +8 -8
- data/lib/mitie/document.rb +43 -24
- data/lib/mitie/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ca692cd09ac5c4541998bf601b13e95a041ac29e5d3fffcdf1e00f4b1a5fee58
|
|
4
|
+
data.tar.gz: dec98a81c3e45b2e6f191883e4a331bbfc0a98d9da7630c9ee806addb2284704
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f802b2a582dc0362351f7c26441e482bc064affa03828e033a948c26a9b6a3bdb42ad216768510b507bd8be6dc0692a34a8b1a0125cfa9533e7710d5ee2c1cba
|
|
7
|
+
data.tar.gz: 0bf700445e710a16a871fff6d7089aa44a3a8a53420e7f9e5a7a8cf959ccf6066a5e7b59bd23a7631b2c378571e34f5cfa4d5158a6b599d91bc1caa3560b79c9
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
- Finds people, organizations, and locations in text
|
|
6
6
|
- Detects relationships between entities, like `PERSON` was born in `LOCATION`
|
|
7
7
|
|
|
8
|
-
[](https://github.com/ankane/mitie/actions)
|
|
9
9
|
|
|
10
10
|
## Installation
|
|
11
11
|
|
|
@@ -32,7 +32,7 @@ model = Mitie::NER.new("ner_model.dat")
|
|
|
32
32
|
Create a document
|
|
33
33
|
|
|
34
34
|
```ruby
|
|
35
|
-
doc = model.doc("Nat
|
|
35
|
+
doc = model.doc("Nat works at GitHub in San Francisco")
|
|
36
36
|
```
|
|
37
37
|
|
|
38
38
|
Get entities
|
|
@@ -45,9 +45,9 @@ This returns
|
|
|
45
45
|
|
|
46
46
|
```ruby
|
|
47
47
|
[
|
|
48
|
-
{text: "Nat
|
|
49
|
-
{text: "GitHub", tag: "ORGANIZATION", score: 0.
|
|
50
|
-
{text: "San Francisco", tag: "LOCATION", score: 1.
|
|
48
|
+
{text: "Nat", tag: "PERSON", score: 0.3112371212688382, offset: 0},
|
|
49
|
+
{text: "GitHub", tag: "ORGANIZATION", score: 0.5660115198329334, offset: 13},
|
|
50
|
+
{text: "San Francisco", tag: "LOCATION", score: 1.3890524313885309, offset: 23}
|
|
51
51
|
]
|
|
52
52
|
```
|
|
53
53
|
|
|
@@ -82,13 +82,13 @@ There are 21 detectors for English. You can find them in the `binary_relations`
|
|
|
82
82
|
Load a detector
|
|
83
83
|
|
|
84
84
|
```ruby
|
|
85
|
-
detector = Mitie::BinaryRelationDetector.new("
|
|
85
|
+
detector = Mitie::BinaryRelationDetector.new("rel_classifier_organization.organization.place_founded.svm")
|
|
86
86
|
```
|
|
87
87
|
|
|
88
88
|
And create a document
|
|
89
89
|
|
|
90
90
|
```ruby
|
|
91
|
-
doc = model.doc("
|
|
91
|
+
doc = model.doc("Shopify was founded in Ottawa")
|
|
92
92
|
```
|
|
93
93
|
|
|
94
94
|
Get relations
|
|
@@ -100,7 +100,7 @@ detector.relations(doc)
|
|
|
100
100
|
This returns
|
|
101
101
|
|
|
102
102
|
```ruby
|
|
103
|
-
[{first: "
|
|
103
|
+
[{first: "Shopify", second: "Ottawa", score: 0.17649169745814464}]
|
|
104
104
|
```
|
|
105
105
|
|
|
106
106
|
## History
|
data/lib/mitie/document.rb
CHANGED
|
@@ -4,7 +4,7 @@ module Mitie
|
|
|
4
4
|
|
|
5
5
|
def initialize(model, text)
|
|
6
6
|
@model = model
|
|
7
|
-
@text = text
|
|
7
|
+
@text = text
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
def tokens
|
|
@@ -13,16 +13,21 @@ module Mitie
|
|
|
13
13
|
|
|
14
14
|
def tokens_with_offset
|
|
15
15
|
@tokens_with_offset ||= begin
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
16
|
+
if text.is_a?(Array)
|
|
17
|
+
# offsets are unknown when given tokens
|
|
18
|
+
text.map { |v| [v, nil] }
|
|
19
|
+
else
|
|
20
|
+
i = 0
|
|
21
|
+
tokens = []
|
|
22
|
+
loop do
|
|
23
|
+
token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
|
|
24
|
+
break if token.null?
|
|
25
|
+
offset = (offsets_ptr.ptr + i * Fiddle::SIZEOF_LONG).to_s(Fiddle::SIZEOF_LONG).unpack1("L!")
|
|
26
|
+
tokens << [token.to_s.force_encoding(text.encoding), offset]
|
|
27
|
+
i += 1
|
|
28
|
+
end
|
|
29
|
+
tokens
|
|
24
30
|
end
|
|
25
|
-
tokens
|
|
26
31
|
end
|
|
27
32
|
end
|
|
28
33
|
|
|
@@ -40,15 +45,20 @@ module Mitie
|
|
|
40
45
|
score = FFI.mitie_ner_get_detection_score(detections, i)
|
|
41
46
|
tok = tokens[pos, len]
|
|
42
47
|
offset = tok[0][1]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
48
|
+
|
|
49
|
+
entity = {}
|
|
50
|
+
if offset
|
|
51
|
+
finish = tok[-1][1] + tok[-1][0].size
|
|
52
|
+
entity[:text] = text[offset...finish]
|
|
53
|
+
else
|
|
54
|
+
entity[:text] = tok.map(&:first)
|
|
55
|
+
end
|
|
56
|
+
entity[:tag] = tag
|
|
57
|
+
entity[:score] = score
|
|
58
|
+
entity[:offset] = offset if offset
|
|
59
|
+
entity[:token_index] = pos
|
|
60
|
+
entity[:token_length] = len
|
|
61
|
+
entities << entity
|
|
52
62
|
end
|
|
53
63
|
entities
|
|
54
64
|
ensure
|
|
@@ -73,13 +83,22 @@ module Mitie
|
|
|
73
83
|
|
|
74
84
|
def tokenize
|
|
75
85
|
@tokenize ||= begin
|
|
76
|
-
|
|
77
|
-
|
|
86
|
+
if text.is_a?(Array)
|
|
87
|
+
# malloc uses memset to set all bytes to 0
|
|
88
|
+
tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1))
|
|
89
|
+
text.size.times do |i|
|
|
90
|
+
tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = Fiddle::Pointer.to_ptr(text[i]).ref
|
|
91
|
+
end
|
|
92
|
+
[tokens_ptr, nil]
|
|
93
|
+
else
|
|
94
|
+
offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
|
95
|
+
tokens_ptr = FFI.mitie_tokenize_with_offsets(text, offsets_ptr)
|
|
78
96
|
|
|
79
|
-
|
|
80
|
-
|
|
97
|
+
ObjectSpace.define_finalizer(tokens_ptr, self.class.finalize(tokens_ptr))
|
|
98
|
+
ObjectSpace.define_finalizer(offsets_ptr, self.class.finalize_ptr(offsets_ptr))
|
|
81
99
|
|
|
82
|
-
|
|
100
|
+
[tokens_ptr, offsets_ptr]
|
|
101
|
+
end
|
|
83
102
|
end
|
|
84
103
|
end
|
|
85
104
|
|
data/lib/mitie/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mitie
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-12-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -52,7 +52,7 @@ dependencies:
|
|
|
52
52
|
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '5'
|
|
55
|
-
description:
|
|
55
|
+
description:
|
|
56
56
|
email: andrew@chartkick.com
|
|
57
57
|
executables: []
|
|
58
58
|
extensions: []
|
|
@@ -75,7 +75,7 @@ homepage: https://github.com/ankane/mitie
|
|
|
75
75
|
licenses:
|
|
76
76
|
- BSL-1.0
|
|
77
77
|
metadata: {}
|
|
78
|
-
post_install_message:
|
|
78
|
+
post_install_message:
|
|
79
79
|
rdoc_options: []
|
|
80
80
|
require_paths:
|
|
81
81
|
- lib
|
|
@@ -90,8 +90,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
90
90
|
- !ruby/object:Gem::Version
|
|
91
91
|
version: '0'
|
|
92
92
|
requirements: []
|
|
93
|
-
rubygems_version: 3.1.
|
|
94
|
-
signing_key:
|
|
93
|
+
rubygems_version: 3.1.4
|
|
94
|
+
signing_key:
|
|
95
95
|
specification_version: 4
|
|
96
96
|
summary: Named-entity recognition for Ruby
|
|
97
97
|
test_files: []
|