mitie 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/mitie/binary_relation_detector.rb +4 -5
- data/lib/mitie/binary_relation_trainer.rb +1 -1
- data/lib/mitie/document.rb +6 -12
- data/lib/mitie/ner.rb +2 -2
- data/lib/mitie/ner_trainer.rb +1 -1
- data/lib/mitie/ner_training_instance.rb +1 -1
- data/lib/mitie/text_categorizer.rb +7 -8
- data/lib/mitie/text_categorizer_trainer.rb +2 -2
- data/lib/mitie/utils.rb +5 -3
- data/lib/mitie/version.rb +1 -1
- data/lib/mitie.rb +4 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf52799fb9a556f5a0fceda3b5c137991a6b49069171bd66f6f5eef1850ac775
|
4
|
+
data.tar.gz: ee25f2614c2322f97b23c46bfc112b102937b806ab977b418d8be9062c3c202d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2d30ca8a73053b3f00f426011bd1cca4d08c003c56d191ecbab45a950597ca98eb717f6f3d6e5347976b4f747defaa7efab63210bd5e73455feb591f04e2703
|
7
|
+
data.tar.gz: 299c393aaab960b9da2fef2975326b02e2163b9d7a54de14648a532a0da80ea200bc026b6383bd57bc0a07b46489a7abc82f4f5e9c419f97c7322a1ec4b12fbd
|
data/CHANGELOG.md
CHANGED
@@ -4,7 +4,7 @@ module Mitie
|
|
4
4
|
if path
|
5
5
|
# better error message
|
6
6
|
raise ArgumentError, "File does not exist" unless File.exist?(path)
|
7
|
-
@pointer = FFI.mitie_load_binary_relation_detector(path)
|
7
|
+
@pointer = FFI.mitie_load_binary_relation_detector(+path)
|
8
8
|
@pointer.free = FFI["mitie_free"]
|
9
9
|
elsif pointer
|
10
10
|
@pointer = pointer
|
@@ -36,7 +36,7 @@ module Mitie
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def save_to_disk(filename)
|
39
|
-
if FFI.mitie_save_binary_relation_detector(filename, pointer) != 0
|
39
|
+
if FFI.mitie_save_binary_relation_detector(+filename, pointer) != 0
|
40
40
|
raise Error, "Unable to save detector"
|
41
41
|
end
|
42
42
|
nil
|
@@ -58,8 +58,9 @@ module Mitie
|
|
58
58
|
entity2[:token_index],
|
59
59
|
entity2[:token_length]
|
60
60
|
)
|
61
|
+
relation.free = FFI["mitie_free"]
|
61
62
|
|
62
|
-
score_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
63
|
+
score_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE, Fiddle::RUBY_FREE)
|
63
64
|
status = FFI.mitie_classify_binary_relation(pointer, relation, score_ptr)
|
64
65
|
raise Error, "Bad status: #{status}" if status != 0
|
65
66
|
|
@@ -71,8 +72,6 @@ module Mitie
|
|
71
72
|
score: score
|
72
73
|
}
|
73
74
|
end
|
74
|
-
ensure
|
75
|
-
FFI.mitie_free(relation) if relation
|
76
75
|
end
|
77
76
|
end
|
78
77
|
end
|
data/lib/mitie/document.rb
CHANGED
@@ -22,7 +22,7 @@ module Mitie
|
|
22
22
|
loop do
|
23
23
|
token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
|
24
24
|
break if token.null?
|
25
|
-
offset = (offsets_ptr
|
25
|
+
offset = (offsets_ptr + i * Fiddle::SIZEOF_LONG).to_str(Fiddle::SIZEOF_LONG).unpack1("L!")
|
26
26
|
tokens << [token.to_s.force_encoding(text.encoding), offset]
|
27
27
|
i += 1
|
28
28
|
end
|
@@ -36,6 +36,7 @@ module Mitie
|
|
36
36
|
entities = []
|
37
37
|
tokens = tokens_with_offset
|
38
38
|
detections = FFI.mitie_extract_entities(pointer, tokens_ptr)
|
39
|
+
detections.free = FFI["mitie_free"]
|
39
40
|
num_detections = FFI.mitie_ner_get_num_detections(detections)
|
40
41
|
num_detections.times do |i|
|
41
42
|
pos = FFI.mitie_ner_get_detection_position(detections, i)
|
@@ -60,8 +61,6 @@ module Mitie
|
|
60
61
|
entities << entity
|
61
62
|
end
|
62
63
|
entities
|
63
|
-
ensure
|
64
|
-
FFI.mitie_free(detections) if detections
|
65
64
|
end
|
66
65
|
end
|
67
66
|
|
@@ -85,20 +84,15 @@ module Mitie
|
|
85
84
|
tokens_ptr = Utils.array_to_pointer(text)
|
86
85
|
[tokens_ptr, nil]
|
87
86
|
else
|
88
|
-
offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
89
|
-
tokens_ptr = FFI.mitie_tokenize_with_offsets(text, offsets_ptr)
|
87
|
+
offsets_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP, Fiddle::RUBY_FREE)
|
88
|
+
tokens_ptr = FFI.mitie_tokenize_with_offsets(+text, offsets_ptr)
|
90
89
|
tokens_ptr.free = FFI["mitie_free"]
|
91
|
-
|
92
|
-
|
90
|
+
offsets_ptr = offsets_ptr.ptr
|
91
|
+
offsets_ptr.free = FFI["mitie_free"]
|
93
92
|
|
94
93
|
[tokens_ptr, offsets_ptr]
|
95
94
|
end
|
96
95
|
end
|
97
96
|
end
|
98
|
-
|
99
|
-
def self.finalize_ptr(pointer)
|
100
|
-
# must use proc instead of stabby lambda
|
101
|
-
proc { FFI.mitie_free(pointer.ptr) }
|
102
|
-
end
|
103
97
|
end
|
104
98
|
end
|
data/lib/mitie/ner.rb
CHANGED
@@ -6,7 +6,7 @@ module Mitie
|
|
6
6
|
if path
|
7
7
|
# better error message
|
8
8
|
raise ArgumentError, "File does not exist" unless File.exist?(path)
|
9
|
-
@pointer = FFI.mitie_load_named_entity_extractor(path)
|
9
|
+
@pointer = FFI.mitie_load_named_entity_extractor(+path)
|
10
10
|
@pointer.free = FFI["mitie_free"]
|
11
11
|
elsif pointer
|
12
12
|
@pointer = pointer
|
@@ -30,7 +30,7 @@ module Mitie
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def save_to_disk(filename)
|
33
|
-
if FFI.mitie_save_named_entity_extractor(filename, pointer) != 0
|
33
|
+
if FFI.mitie_save_named_entity_extractor(+filename, pointer) != 0
|
34
34
|
raise Error, "Unable to save model"
|
35
35
|
end
|
36
36
|
nil
|
data/lib/mitie/ner_trainer.rb
CHANGED
@@ -2,7 +2,7 @@ module Mitie
|
|
2
2
|
class NERTrainer
|
3
3
|
def initialize(filename)
|
4
4
|
raise ArgumentError, "File does not exist" unless File.exist?(filename)
|
5
|
-
@pointer = FFI.mitie_create_ner_trainer(filename)
|
5
|
+
@pointer = FFI.mitie_create_ner_trainer(+filename)
|
6
6
|
@pointer.free = FFI["mitie_free"]
|
7
7
|
end
|
8
8
|
|
@@ -15,7 +15,7 @@ module Mitie
|
|
15
15
|
|
16
16
|
raise ArgumentError, "Range overlaps existing entity" if overlaps_any_entity?(range)
|
17
17
|
|
18
|
-
unless FFI.mitie_add_ner_training_entity(@pointer, range.begin, range.size, label).zero?
|
18
|
+
unless FFI.mitie_add_ner_training_entity(@pointer, range.begin, range.size, +label).zero?
|
19
19
|
raise Error, "Unable to add entity to training instance. Probably ran out of RAM."
|
20
20
|
end
|
21
21
|
|
@@ -4,7 +4,7 @@ module Mitie
|
|
4
4
|
if path
|
5
5
|
# better error message
|
6
6
|
raise ArgumentError, "File does not exist" unless File.exist?(path)
|
7
|
-
@pointer = FFI.mitie_load_text_categorizer(path)
|
7
|
+
@pointer = FFI.mitie_load_text_categorizer(+path)
|
8
8
|
@pointer.free = FFI["mitie_free"]
|
9
9
|
elsif pointer
|
10
10
|
@pointer = pointer
|
@@ -16,24 +16,23 @@ module Mitie
|
|
16
16
|
def categorize(text)
|
17
17
|
tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
|
18
18
|
tokens_pointer = Utils.array_to_pointer(tokens)
|
19
|
-
text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
20
|
-
text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
19
|
+
text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP, Fiddle::RUBY_FREE)
|
20
|
+
text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE, Fiddle::RUBY_FREE)
|
21
21
|
|
22
22
|
if FFI.mitie_categorize_text(@pointer, tokens_pointer, text_tag, text_score) != 0
|
23
23
|
raise Error, "Unable to categorize"
|
24
24
|
end
|
25
|
+
text_tag = text_tag.ptr
|
26
|
+
text_tag.free = FFI["mitie_free"]
|
25
27
|
|
26
28
|
{
|
27
|
-
tag: text_tag.
|
29
|
+
tag: text_tag.to_s,
|
28
30
|
score: Utils.read_double(text_score)
|
29
31
|
}
|
30
|
-
ensure
|
31
|
-
# text_tag must be freed
|
32
|
-
FFI.mitie_free(text_tag.ptr) if text_tag
|
33
32
|
end
|
34
33
|
|
35
34
|
def save_to_disk(filename)
|
36
|
-
if FFI.mitie_save_text_categorizer(filename, @pointer) != 0
|
35
|
+
if FFI.mitie_save_text_categorizer(+filename, @pointer) != 0
|
37
36
|
raise Error, "Unable to save model"
|
38
37
|
end
|
39
38
|
nil
|
@@ -2,14 +2,14 @@ module Mitie
|
|
2
2
|
class TextCategorizerTrainer
|
3
3
|
def initialize(filename)
|
4
4
|
raise ArgumentError, "File does not exist" unless File.exist?(filename)
|
5
|
-
@pointer = FFI.mitie_create_text_categorizer_trainer(filename)
|
5
|
+
@pointer = FFI.mitie_create_text_categorizer_trainer(+filename)
|
6
6
|
@pointer.free = FFI["mitie_free"]
|
7
7
|
end
|
8
8
|
|
9
9
|
def add(text, label)
|
10
10
|
tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
|
11
11
|
tokens_pointer = Utils.array_to_pointer(tokens)
|
12
|
-
FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, label)
|
12
|
+
FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, +label)
|
13
13
|
end
|
14
14
|
|
15
15
|
def beta
|
data/lib/mitie/utils.rb
CHANGED
@@ -2,10 +2,12 @@ module Mitie
|
|
2
2
|
module Utils
|
3
3
|
def self.array_to_pointer(text)
|
4
4
|
# malloc uses memset to set all bytes to 0
|
5
|
-
tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1))
|
5
|
+
tokens_ptr = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP * (text.size + 1), Fiddle::RUBY_FREE)
|
6
|
+
text_ptrs = text.map { |v| Fiddle::Pointer[v] }
|
6
7
|
text.size.times do |i|
|
7
|
-
tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] =
|
8
|
+
tokens_ptr[i * Fiddle::SIZEOF_VOIDP, Fiddle::SIZEOF_VOIDP] = text_ptrs[i].ref
|
8
9
|
end
|
10
|
+
tokens_ptr.instance_variable_set(:@mitie_refs, text_ptrs)
|
9
11
|
tokens_ptr
|
10
12
|
end
|
11
13
|
|
@@ -16,7 +18,7 @@ module Mitie
|
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.read_double(ptr)
|
19
|
-
ptr.
|
21
|
+
ptr.to_str(Fiddle::SIZEOF_DOUBLE).unpack1("d")
|
20
22
|
end
|
21
23
|
end
|
22
24
|
end
|
data/lib/mitie/version.rb
CHANGED
data/lib/mitie.rb
CHANGED
@@ -39,20 +39,18 @@ module Mitie
|
|
39
39
|
|
40
40
|
class << self
|
41
41
|
def tokenize(text)
|
42
|
-
tokens_ptr = FFI.mitie_tokenize(text.to_s)
|
42
|
+
tokens_ptr = FFI.mitie_tokenize(+text.to_s)
|
43
|
+
tokens_ptr.free = FFI["mitie_free"]
|
43
44
|
tokens = read_tokens(tokens_ptr)
|
44
45
|
tokens.each { |t| t.force_encoding(text.encoding) }
|
45
46
|
tokens
|
46
|
-
ensure
|
47
|
-
FFI.mitie_free(tokens_ptr) if tokens_ptr
|
48
47
|
end
|
49
48
|
|
50
49
|
def tokenize_file(filename)
|
51
50
|
raise ArgumentError, "File does not exist" unless File.exist?(filename)
|
52
|
-
tokens_ptr = FFI.mitie_tokenize_file(filename)
|
51
|
+
tokens_ptr = FFI.mitie_tokenize_file(+filename)
|
52
|
+
tokens_ptr.free = FFI["mitie_free"]
|
53
53
|
read_tokens(tokens_ptr)
|
54
|
-
ensure
|
55
|
-
FFI.mitie_free(tokens_ptr) if tokens_ptr
|
56
54
|
end
|
57
55
|
|
58
56
|
private
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mitie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: fiddle
|
@@ -66,7 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0'
|
68
68
|
requirements: []
|
69
|
-
rubygems_version: 3.6.
|
69
|
+
rubygems_version: 3.6.7
|
70
70
|
specification_version: 4
|
71
71
|
summary: Named-entity recognition for Ruby
|
72
72
|
test_files: []
|