blingfire 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +14 -0
- data/lib/blingfire/ffi.rb +3 -0
- data/lib/blingfire/model.rb +8 -0
- data/lib/blingfire/version.rb +1 -1
- data/lib/blingfire.rb +9 -0
- data/vendor/blingfiretokdll.dll +0 -0
- data/vendor/libblingfiretokdll.arm64.dylib +0 -0
- data/vendor/libblingfiretokdll.arm64.so +0 -0
- data/vendor/libblingfiretokdll.dylib +0 -0
- data/vendor/libblingfiretokdll.so +0 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c6b1fa4c1af8020140b480f8c3579bd62232b25d3cbe470a5149ecac7279a8c
|
4
|
+
data.tar.gz: ea2c66e829368d3858759edb0e0b5644e2f1abc14458d6a8a11e618aacb33951
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 714e09d5190152a1009c33000651675405a3f6402f1be056ecce74d73c1aa16c265d3c68245cb0118b5fe6e693d35f21b64b372b8f9d91ae445f063d92c22678
|
7
|
+
data.tar.gz: 3ec81c37b184e4363b4faf16d89a9869362c0655e19be451540dada56f32cba607fc7fe94dbe9210da3a864dc22dd692fc161619d2c80e6cdf0847b8f8029305
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -82,6 +82,20 @@ Disable prefix space
|
|
82
82
|
model = BlingFire.load_model("roberta.bin", prefix: false)
|
83
83
|
```
|
84
84
|
|
85
|
+
## Ids to Text [experimental]
|
86
|
+
|
87
|
+
Load a model
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
model = BlingFire.load_model("bert_base_tok.i2w")
|
91
|
+
```
|
92
|
+
|
93
|
+
Convert ids to text
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
model.ids_to_text(ids)
|
97
|
+
```
|
98
|
+
|
85
99
|
## History
|
86
100
|
|
87
101
|
View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
|
data/lib/blingfire/ffi.rb
CHANGED
@@ -45,5 +45,8 @@ module BlingFire
|
|
45
45
|
|
46
46
|
# prefix
|
47
47
|
extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
|
48
|
+
|
49
|
+
# ids to text
|
50
|
+
extern "int IdsToText(void* ModelPtr, int32_t * pIdsArr, int IdsCount, char * pOutUtf8Str, int MaxOutUtf8StrByteCount, bool SkipSpecialTokens)"
|
48
51
|
end
|
49
52
|
end
|
data/lib/blingfire/model.rb
CHANGED
@@ -61,6 +61,14 @@ module BlingFire
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
def ids_to_text(ids, skip_special_tokens: true, output_buffer_size: nil)
|
65
|
+
if @handle
|
66
|
+
BlingFire.ids_to_text(@handle, ids, skip_special_tokens: skip_special_tokens, output_buffer_size: output_buffer_size)
|
67
|
+
else
|
68
|
+
raise "Not implemented"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
64
72
|
def to_ptr
|
65
73
|
@handle
|
66
74
|
end
|
data/lib/blingfire/version.rb
CHANGED
data/lib/blingfire.rb
CHANGED
@@ -113,6 +113,15 @@ module BlingFire
|
|
113
113
|
[result].concat(unpack_offsets(start_offsets, end_offsets, result, text))
|
114
114
|
end
|
115
115
|
|
116
|
+
def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
|
117
|
+
output_buffer_size ||= ids.size * 32
|
118
|
+
c_ids = Fiddle::Pointer[ids.pack("i*")]
|
119
|
+
out = Fiddle::Pointer.malloc(output_buffer_size)
|
120
|
+
out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
|
121
|
+
check_status out_size, out
|
122
|
+
encode_utf8(out.to_str(out_size - 1))
|
123
|
+
end
|
124
|
+
|
116
125
|
def free_model(model)
|
117
126
|
FFI.FreeModel(model)
|
118
127
|
end
|
data/vendor/blingfiretokdll.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blingfire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
90
|
- !ruby/object:Gem::Version
|
91
91
|
version: '0'
|
92
92
|
requirements: []
|
93
|
-
rubygems_version: 3.2.
|
93
|
+
rubygems_version: 3.2.22
|
94
94
|
signing_key:
|
95
95
|
specification_version: 4
|
96
96
|
summary: High speed text tokenization for Ruby
|