blingfire 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +6 -0
- data/lib/blingfire.rb +9 -3
- data/lib/blingfire/ffi.rb +5 -0
- data/lib/blingfire/model.rb +5 -1
- data/lib/blingfire/version.rb +1 -1
- data/vendor/blingfiretokdll.dll +0 -0
- data/vendor/libblingfiretokdll.arm64.dylib +0 -0
- data/vendor/libblingfiretokdll.arm64.so +0 -0
- data/vendor/libblingfiretokdll.dylib +0 -0
- data/vendor/libblingfiretokdll.so +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fa9f7ebf09e5745b6865d4c5645b36f42b7c484ed667ce493febee80887e89ad
|
4
|
+
data.tar.gz: 7fb6e8716138c35396964081b8ad60d7c01db88e71e4fbe219d0ea27a6eff94b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f1e20f3520f4be17af2df5db4b3e05b169756f1667dccb08dbafb89ee07f633862d16649fa8e3be505073ca58a11989cc5c9b99d3f571aae2eaff3d4053dd8c
|
7
|
+
data.tar.gz: 8f70ecb06ddcad466508a0debe63fd99e9a5cbab35e3702296c8ff522ffbb5075ad52b2ce68d2f316de6685256b756ff098bc5a2b29e807652b89de84f9b4661
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -76,6 +76,12 @@ Get offsets for ids
|
|
76
76
|
ids, start_offsets, end_offsets = model.text_to_ids_with_offsets(text)
|
77
77
|
```
|
78
78
|
|
79
|
+
Disable prefix space
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
model = BlingFire.load_model("roberta.bin", prefix: false)
|
83
|
+
```
|
84
|
+
|
79
85
|
## History
|
80
86
|
|
81
87
|
View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
|
data/lib/blingfire.rb
CHANGED
@@ -38,8 +38,8 @@ module BlingFire
|
|
38
38
|
FFI.GetBlingFireTokVersion
|
39
39
|
end
|
40
40
|
|
41
|
-
def load_model(path)
|
42
|
-
Model.new(path)
|
41
|
+
def load_model(path, **options)
|
42
|
+
Model.new(path, **options)
|
43
43
|
end
|
44
44
|
|
45
45
|
def text_to_words(text)
|
@@ -126,6 +126,12 @@ module BlingFire
|
|
126
126
|
encode_utf8(out.to_str(out_size))
|
127
127
|
end
|
128
128
|
|
129
|
+
def change_settings_dummy_prefix(model, value)
|
130
|
+
# use opposite of value
|
131
|
+
ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1)
|
132
|
+
raise Error, "Bad status: #{ret}" if ret != 1
|
133
|
+
end
|
134
|
+
|
129
135
|
private
|
130
136
|
|
131
137
|
def check_status(ret, ptr)
|
@@ -171,7 +177,7 @@ module BlingFire
|
|
171
177
|
# TODO see if more efficient to store next_pos in variable
|
172
178
|
pos = 0
|
173
179
|
text.each_char.with_index do |c, i|
|
174
|
-
while pos == start_bytes[starts.size]
|
180
|
+
while pos == start_bytes[starts.size] || start_bytes[starts.size] == -1
|
175
181
|
starts << i
|
176
182
|
end
|
177
183
|
pos += c.bytesize
|
data/lib/blingfire/ffi.rb
CHANGED
@@ -10,6 +10,8 @@ module BlingFire
|
|
10
10
|
raise e
|
11
11
|
end
|
12
12
|
|
13
|
+
typealias "bool", "char"
|
14
|
+
|
13
15
|
# https://github.com/microsoft/BlingFire/blob/master/blingfiretools/blingfiretokdll/blingfiretokdll.cpp
|
14
16
|
|
15
17
|
# version
|
@@ -40,5 +42,8 @@ module BlingFire
|
|
40
42
|
|
41
43
|
# free model
|
42
44
|
extern "int FreeModel(void* ModelPtr)"
|
45
|
+
|
46
|
+
# prefix
|
47
|
+
extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
|
43
48
|
end
|
44
49
|
end
|
data/lib/blingfire/model.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
module BlingFire
|
2
2
|
class Model
|
3
|
-
def initialize(path = nil)
|
3
|
+
def initialize(path = nil, prefix: nil)
|
4
4
|
@handle = nil
|
5
5
|
if path
|
6
6
|
raise Error, "Model not found" unless File.exist?(path)
|
7
7
|
@handle = FFI.LoadModel(path)
|
8
8
|
ObjectSpace.define_finalizer(self, self.class.finalize(@handle))
|
9
|
+
|
10
|
+
BlingFire.change_settings_dummy_prefix(@handle, prefix) unless prefix.nil?
|
11
|
+
else
|
12
|
+
raise Error, "prefix option requires path" unless prefix.nil?
|
9
13
|
end
|
10
14
|
end
|
11
15
|
|
data/lib/blingfire/version.rb
CHANGED
data/vendor/blingfiretokdll.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blingfire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|