mitie 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +2 -2
- data/lib/mitie/ffi.rb +1 -0
- data/lib/mitie/text_categorizer.rb +2 -1
- data/lib/mitie/text_categorizer_trainer.rb +2 -1
- data/lib/mitie/version.rb +1 -1
- data/lib/mitie.rb +32 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc2b7bdba2fba6b335ab9750efab8766190e618c4b8e2542ff6409f6727ec8b2
|
4
|
+
data.tar.gz: 03d85b928082a04b46209694c8e1b294e5c4205057a26b5cc65e695bbe3564a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4092a2dc005bb76527429454c301179c63f0eeee4913a3a8f56190e13d7f7551ee3bbfcd1098a6c335cf3b886ec0dee4e52f5c062975a4a058d117229f45b340
|
7
|
+
data.tar.gz: 066e5800a520b16002388088fc367939a26738bd638631d3b86ff9074d508cf5cd051154fbbf7be129f07f82c97a851caf7a6486e6fff2fb7f6a4b252d426ec0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -183,7 +183,7 @@ trainer = Mitie::TextCategorizerTrainer.new("total_word_feature_extractor.dat")
|
|
183
183
|
Add labeled text to the trainer
|
184
184
|
|
185
185
|
```ruby
|
186
|
-
trainer.add(
|
186
|
+
trainer.add("This is super cool", "positive")
|
187
187
|
```
|
188
188
|
|
189
189
|
Train the model
|
@@ -207,7 +207,7 @@ model = Mitie::TextCategorizer.new("text_categorization_model.dat")
|
|
207
207
|
Categorize text
|
208
208
|
|
209
209
|
```ruby
|
210
|
-
model.categorize(
|
210
|
+
model.categorize("What a super nice day")
|
211
211
|
```
|
212
212
|
|
213
213
|
## Deployment
|
data/lib/mitie/ffi.rb
CHANGED
@@ -14,6 +14,7 @@ module Mitie
|
|
14
14
|
|
15
15
|
extern "void mitie_free(void* object)"
|
16
16
|
extern "char** mitie_tokenize(const char* text)"
|
17
|
+
extern "char** mitie_tokenize_file(const char* filename)"
|
17
18
|
extern "char** mitie_tokenize_with_offsets(const char* text, unsigned long** token_offsets)"
|
18
19
|
|
19
20
|
# ner
|
@@ -14,7 +14,8 @@ module Mitie
|
|
14
14
|
ObjectSpace.define_finalizer(self, self.class.finalize(@pointer))
|
15
15
|
end
|
16
16
|
|
17
|
-
def categorize(
|
17
|
+
def categorize(text)
|
18
|
+
tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
|
18
19
|
tokens_pointer = Utils.array_to_pointer(tokens)
|
19
20
|
text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
20
21
|
text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -7,7 +7,8 @@ module Mitie
|
|
7
7
|
ObjectSpace.define_finalizer(self, self.class.finalize(@pointer))
|
8
8
|
end
|
9
9
|
|
10
|
-
def add(
|
10
|
+
def add(text, label)
|
11
|
+
tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
|
11
12
|
tokens_pointer = Utils.array_to_pointer(tokens)
|
12
13
|
FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, label)
|
13
14
|
end
|
data/lib/mitie/version.rb
CHANGED
data/lib/mitie.rb
CHANGED
@@ -36,4 +36,36 @@ module Mitie
|
|
36
36
|
|
37
37
|
# friendlier error message
|
38
38
|
autoload :FFI, "mitie/ffi"
|
39
|
+
|
40
|
+
class << self
|
41
|
+
def tokenize(text)
|
42
|
+
tokens_ptr = FFI.mitie_tokenize(text)
|
43
|
+
tokens = read_tokens(tokens_ptr)
|
44
|
+
tokens.each { |t| t.force_encoding(text.encoding) }
|
45
|
+
tokens
|
46
|
+
ensure
|
47
|
+
FFI.mitie_free(tokens_ptr) if tokens_ptr
|
48
|
+
end
|
49
|
+
|
50
|
+
def tokenize_file(filename)
|
51
|
+
tokens_ptr = FFI.mitie_tokenize_file(filename)
|
52
|
+
read_tokens(tokens_ptr)
|
53
|
+
ensure
|
54
|
+
FFI.mitie_free(tokens_ptr) if tokens_ptr
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def read_tokens(tokens_ptr)
|
60
|
+
i = 0
|
61
|
+
tokens = []
|
62
|
+
loop do
|
63
|
+
token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
|
64
|
+
break if token.null?
|
65
|
+
tokens << token.to_s
|
66
|
+
i += 1
|
67
|
+
end
|
68
|
+
tokens
|
69
|
+
end
|
70
|
+
end
|
39
71
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mitie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|