blingfire 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +22 -8
- data/lib/blingfire/ffi.rb +3 -0
- data/lib/blingfire/model.rb +8 -0
- data/lib/blingfire/version.rb +1 -1
- data/lib/blingfire.rb +11 -2
- data/vendor/blingfiretokdll.dll +0 -0
- data/vendor/libblingfiretokdll.arm64.dylib +0 -0
- data/vendor/libblingfiretokdll.arm64.so +0 -0
- data/vendor/libblingfiretokdll.dylib +0 -0
- data/vendor/libblingfiretokdll.so +0 -0
- metadata +6 -48
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 466b528fca6404415ad072b597cbb6fc96628a325504aba0e7becc4793b35b3f
|
4
|
+
data.tar.gz: c1e02e1f8c48578c6407ac9f7d7a996ecd78205ffdcaa3158144012f61d8922e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbfd60e8e54b42a1e7258afcebb51d27029bf66a2969daf4badc62f3117d7113b8d02f99c3445513dc37f08698205c21ba97787ba761512166257438b73d3956
|
7
|
+
data.tar.gz: 1aa595a55116d5e084d31e087180ed04392d1b7d91932a875d53cab88bc72b3a1ae179c15094b9679aa9fa78dc4419afc70f144ba971343d0c465a7842ca634e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
# Bling Fire
|
1
|
+
# Bling Fire Ruby
|
2
2
|
|
3
3
|
[Bling Fire](https://github.com/microsoft/BlingFire) - high speed text tokenization - for Ruby
|
4
4
|
|
5
|
-
[](https://github.com/ankane/blingfire/actions)
|
5
|
+
[](https://github.com/ankane/blingfire-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
9
9
|
Add this line to your application’s Gemfile:
|
10
10
|
|
11
11
|
```ruby
|
12
|
-
gem
|
12
|
+
gem "blingfire"
|
13
13
|
```
|
14
14
|
|
15
15
|
## Getting Started
|
@@ -82,24 +82,38 @@ Disable prefix space
|
|
82
82
|
model = BlingFire.load_model("roberta.bin", prefix: false)
|
83
83
|
```
|
84
84
|
|
85
|
+
## Ids to Text [experimental]
|
86
|
+
|
87
|
+
Load a model
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
model = BlingFire.load_model("bert_base_tok.i2w")
|
91
|
+
```
|
92
|
+
|
93
|
+
Convert ids to text
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
model.ids_to_text(ids)
|
97
|
+
```
|
98
|
+
|
85
99
|
## History
|
86
100
|
|
87
|
-
View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
|
101
|
+
View the [changelog](https://github.com/ankane/blingfire-ruby/blob/master/CHANGELOG.md)
|
88
102
|
|
89
103
|
## Contributing
|
90
104
|
|
91
105
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
92
106
|
|
93
|
-
- [Report bugs](https://github.com/ankane/blingfire/issues)
|
94
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/blingfire/pulls)
|
107
|
+
- [Report bugs](https://github.com/ankane/blingfire-ruby/issues)
|
108
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/blingfire-ruby/pulls)
|
95
109
|
- Write, clarify, or fix documentation
|
96
110
|
- Suggest or add new features
|
97
111
|
|
98
112
|
To get started with development:
|
99
113
|
|
100
114
|
```sh
|
101
|
-
git clone https://github.com/ankane/blingfire.git
|
102
|
-
cd blingfire
|
115
|
+
git clone https://github.com/ankane/blingfire-ruby.git
|
116
|
+
cd blingfire-ruby
|
103
117
|
bundle install
|
104
118
|
bundle exec rake vendor:all download:models
|
105
119
|
bundle exec rake test
|
data/lib/blingfire/ffi.rb
CHANGED
@@ -45,5 +45,8 @@ module BlingFire
|
|
45
45
|
|
46
46
|
# prefix
|
47
47
|
extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
|
48
|
+
|
49
|
+
# ids to text
|
50
|
+
extern "int IdsToText(void* ModelPtr, int32_t * pIdsArr, int IdsCount, char * pOutUtf8Str, int MaxOutUtf8StrByteCount, bool SkipSpecialTokens)"
|
48
51
|
end
|
49
52
|
end
|
data/lib/blingfire/model.rb
CHANGED
@@ -61,6 +61,14 @@ module BlingFire
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
def ids_to_text(ids, skip_special_tokens: true, output_buffer_size: nil)
|
65
|
+
if @handle
|
66
|
+
BlingFire.ids_to_text(@handle, ids, skip_special_tokens: skip_special_tokens, output_buffer_size: output_buffer_size)
|
67
|
+
else
|
68
|
+
raise "Not implemented"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
64
72
|
def to_ptr
|
65
73
|
@handle
|
66
74
|
end
|
data/lib/blingfire/version.rb
CHANGED
data/lib/blingfire.rb
CHANGED
@@ -15,13 +15,13 @@ module BlingFire
|
|
15
15
|
if Gem.win_platform?
|
16
16
|
"blingfiretokdll.dll"
|
17
17
|
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
18
|
-
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
18
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
19
19
|
"libblingfiretokdll.arm64.dylib"
|
20
20
|
else
|
21
21
|
"libblingfiretokdll.dylib"
|
22
22
|
end
|
23
23
|
else
|
24
|
-
if RbConfig::CONFIG["host_cpu"] =~ /aarch64/i
|
24
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
25
25
|
"libblingfiretokdll.arm64.so"
|
26
26
|
else
|
27
27
|
"libblingfiretokdll.so"
|
@@ -113,6 +113,15 @@ module BlingFire
|
|
113
113
|
[result].concat(unpack_offsets(start_offsets, end_offsets, result, text))
|
114
114
|
end
|
115
115
|
|
116
|
+
def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
|
117
|
+
output_buffer_size ||= ids.size * 32
|
118
|
+
c_ids = Fiddle::Pointer[ids.pack("i*")]
|
119
|
+
out = Fiddle::Pointer.malloc(output_buffer_size)
|
120
|
+
out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
|
121
|
+
check_status out_size, out
|
122
|
+
encode_utf8(out.to_str(out_size - 1))
|
123
|
+
end
|
124
|
+
|
116
125
|
def free_model(model)
|
117
126
|
FFI.FreeModel(model)
|
118
127
|
end
|
data/vendor/blingfiretokdll.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,59 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blingfire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: minitest
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '5'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '5'
|
11
|
+
date: 2023-02-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
55
13
|
description:
|
56
|
-
email: andrew@
|
14
|
+
email: andrew@ankane.org
|
57
15
|
executables: []
|
58
16
|
extensions: []
|
59
17
|
extra_rdoc_files: []
|
@@ -71,7 +29,7 @@ files:
|
|
71
29
|
- vendor/libblingfiretokdll.arm64.so
|
72
30
|
- vendor/libblingfiretokdll.dylib
|
73
31
|
- vendor/libblingfiretokdll.so
|
74
|
-
homepage: https://github.com/ankane/blingfire
|
32
|
+
homepage: https://github.com/ankane/blingfire-ruby
|
75
33
|
licenses:
|
76
34
|
- MIT
|
77
35
|
metadata: {}
|
@@ -90,7 +48,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
48
|
- !ruby/object:Gem::Version
|
91
49
|
version: '0'
|
92
50
|
requirements: []
|
93
|
-
rubygems_version: 3.
|
51
|
+
rubygems_version: 3.4.1
|
94
52
|
signing_key:
|
95
53
|
specification_version: 4
|
96
54
|
summary: High speed text tokenization for Ruby
|