mitie 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +2 -2
- data/lib/mitie/ffi.rb +1 -0
- data/lib/mitie/text_categorizer.rb +2 -1
- data/lib/mitie/text_categorizer_trainer.rb +2 -1
- data/lib/mitie/version.rb +1 -1
- data/lib/mitie.rb +32 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc2b7bdba2fba6b335ab9750efab8766190e618c4b8e2542ff6409f6727ec8b2
|
4
|
+
data.tar.gz: 03d85b928082a04b46209694c8e1b294e5c4205057a26b5cc65e695bbe3564a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4092a2dc005bb76527429454c301179c63f0eeee4913a3a8f56190e13d7f7551ee3bbfcd1098a6c335cf3b886ec0dee4e52f5c062975a4a058d117229f45b340
|
7
|
+
data.tar.gz: 066e5800a520b16002388088fc367939a26738bd638631d3b86ff9074d508cf5cd051154fbbf7be129f07f82c97a851caf7a6486e6fff2fb7f6a4b252d426ec0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -183,7 +183,7 @@ trainer = Mitie::TextCategorizerTrainer.new("total_word_feature_extractor.dat")
|
|
183
183
|
Add labeled text to the trainer
|
184
184
|
|
185
185
|
```ruby
|
186
|
-
trainer.add(
|
186
|
+
trainer.add("This is super cool", "positive")
|
187
187
|
```
|
188
188
|
|
189
189
|
Train the model
|
@@ -207,7 +207,7 @@ model = Mitie::TextCategorizer.new("text_categorization_model.dat")
|
|
207
207
|
Categorize text
|
208
208
|
|
209
209
|
```ruby
|
210
|
-
model.categorize(
|
210
|
+
model.categorize("What a super nice day")
|
211
211
|
```
|
212
212
|
|
213
213
|
## Deployment
|
data/lib/mitie/ffi.rb
CHANGED
@@ -14,6 +14,7 @@ module Mitie
|
|
14
14
|
|
15
15
|
extern "void mitie_free(void* object)"
|
16
16
|
extern "char** mitie_tokenize(const char* text)"
|
17
|
+
extern "char** mitie_tokenize_file(const char* filename)"
|
17
18
|
extern "char** mitie_tokenize_with_offsets(const char* text, unsigned long** token_offsets)"
|
18
19
|
|
19
20
|
# ner
|
@@ -14,7 +14,8 @@ module Mitie
|
|
14
14
|
ObjectSpace.define_finalizer(self, self.class.finalize(@pointer))
|
15
15
|
end
|
16
16
|
|
17
|
-
def categorize(
|
17
|
+
def categorize(text)
|
18
|
+
tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
|
18
19
|
tokens_pointer = Utils.array_to_pointer(tokens)
|
19
20
|
text_tag = Fiddle::Pointer.malloc(Fiddle::SIZEOF_VOIDP)
|
20
21
|
text_score = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -7,7 +7,8 @@ module Mitie
|
|
7
7
|
ObjectSpace.define_finalizer(self, self.class.finalize(@pointer))
|
8
8
|
end
|
9
9
|
|
10
|
-
def add(
|
10
|
+
def add(text, label)
|
11
|
+
tokens = text.is_a?(Array) ? text : Mitie.tokenize(text)
|
11
12
|
tokens_pointer = Utils.array_to_pointer(tokens)
|
12
13
|
FFI.mitie_add_text_categorizer_labeled_text(@pointer, tokens_pointer, label)
|
13
14
|
end
|
data/lib/mitie/version.rb
CHANGED
data/lib/mitie.rb
CHANGED
@@ -36,4 +36,36 @@ module Mitie
|
|
36
36
|
|
37
37
|
# friendlier error message
|
38
38
|
autoload :FFI, "mitie/ffi"
|
39
|
+
|
40
|
+
class << self
|
41
|
+
def tokenize(text)
|
42
|
+
tokens_ptr = FFI.mitie_tokenize(text)
|
43
|
+
tokens = read_tokens(tokens_ptr)
|
44
|
+
tokens.each { |t| t.force_encoding(text.encoding) }
|
45
|
+
tokens
|
46
|
+
ensure
|
47
|
+
FFI.mitie_free(tokens_ptr) if tokens_ptr
|
48
|
+
end
|
49
|
+
|
50
|
+
def tokenize_file(filename)
|
51
|
+
tokens_ptr = FFI.mitie_tokenize_file(filename)
|
52
|
+
read_tokens(tokens_ptr)
|
53
|
+
ensure
|
54
|
+
FFI.mitie_free(tokens_ptr) if tokens_ptr
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def read_tokens(tokens_ptr)
|
60
|
+
i = 0
|
61
|
+
tokens = []
|
62
|
+
loop do
|
63
|
+
token = (tokens_ptr + i * Fiddle::SIZEOF_VOIDP).ptr
|
64
|
+
break if token.null?
|
65
|
+
tokens << token.to_s
|
66
|
+
i += 1
|
67
|
+
end
|
68
|
+
tokens
|
69
|
+
end
|
70
|
+
end
|
39
71
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mitie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|