blingfire 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +14 -0
- data/lib/blingfire/ffi.rb +3 -0
- data/lib/blingfire/model.rb +8 -0
- data/lib/blingfire/version.rb +1 -1
- data/lib/blingfire.rb +9 -0
- data/vendor/blingfiretokdll.dll +0 -0
- data/vendor/libblingfiretokdll.arm64.dylib +0 -0
- data/vendor/libblingfiretokdll.arm64.so +0 -0
- data/vendor/libblingfiretokdll.dylib +0 -0
- data/vendor/libblingfiretokdll.so +0 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c6b1fa4c1af8020140b480f8c3579bd62232b25d3cbe470a5149ecac7279a8c
|
4
|
+
data.tar.gz: ea2c66e829368d3858759edb0e0b5644e2f1abc14458d6a8a11e618aacb33951
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 714e09d5190152a1009c33000651675405a3f6402f1be056ecce74d73c1aa16c265d3c68245cb0118b5fe6e693d35f21b64b372b8f9d91ae445f063d92c22678
|
7
|
+
data.tar.gz: 3ec81c37b184e4363b4faf16d89a9869362c0655e19be451540dada56f32cba607fc7fe94dbe9210da3a864dc22dd692fc161619d2c80e6cdf0847b8f8029305
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -82,6 +82,20 @@ Disable prefix space
|
|
82
82
|
model = BlingFire.load_model("roberta.bin", prefix: false)
|
83
83
|
```
|
84
84
|
|
85
|
+
## Ids to Text [experimental]
|
86
|
+
|
87
|
+
Load a model
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
model = BlingFire.load_model("bert_base_tok.i2w")
|
91
|
+
```
|
92
|
+
|
93
|
+
Convert ids to text
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
model.ids_to_text(ids)
|
97
|
+
```
|
98
|
+
|
85
99
|
## History
|
86
100
|
|
87
101
|
View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
|
data/lib/blingfire/ffi.rb
CHANGED
@@ -45,5 +45,8 @@ module BlingFire
|
|
45
45
|
|
46
46
|
# prefix
|
47
47
|
extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
|
48
|
+
|
49
|
+
# ids to text
|
50
|
+
extern "int IdsToText(void* ModelPtr, int32_t * pIdsArr, int IdsCount, char * pOutUtf8Str, int MaxOutUtf8StrByteCount, bool SkipSpecialTokens)"
|
48
51
|
end
|
49
52
|
end
|
data/lib/blingfire/model.rb
CHANGED
@@ -61,6 +61,14 @@ module BlingFire
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
def ids_to_text(ids, skip_special_tokens: true, output_buffer_size: nil)
|
65
|
+
if @handle
|
66
|
+
BlingFire.ids_to_text(@handle, ids, skip_special_tokens: skip_special_tokens, output_buffer_size: output_buffer_size)
|
67
|
+
else
|
68
|
+
raise "Not implemented"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
64
72
|
def to_ptr
|
65
73
|
@handle
|
66
74
|
end
|
data/lib/blingfire/version.rb
CHANGED
data/lib/blingfire.rb
CHANGED
@@ -113,6 +113,15 @@ module BlingFire
|
|
113
113
|
[result].concat(unpack_offsets(start_offsets, end_offsets, result, text))
|
114
114
|
end
|
115
115
|
|
116
|
+
def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
|
117
|
+
output_buffer_size ||= ids.size * 32
|
118
|
+
c_ids = Fiddle::Pointer[ids.pack("i*")]
|
119
|
+
out = Fiddle::Pointer.malloc(output_buffer_size)
|
120
|
+
out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
|
121
|
+
check_status out_size, out
|
122
|
+
encode_utf8(out.to_str(out_size - 1))
|
123
|
+
end
|
124
|
+
|
116
125
|
def free_model(model)
|
117
126
|
FFI.FreeModel(model)
|
118
127
|
end
|
data/vendor/blingfiretokdll.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blingfire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
90
|
- !ruby/object:Gem::Version
|
91
91
|
version: '0'
|
92
92
|
requirements: []
|
93
|
-
rubygems_version: 3.2.
|
93
|
+
rubygems_version: 3.2.22
|
94
94
|
signing_key:
|
95
95
|
specification_version: 4
|
96
96
|
summary: High speed text tokenization for Ruby
|