blingfire 0.1.6 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +22 -8
- data/lib/blingfire/ffi.rb +3 -0
- data/lib/blingfire/model.rb +8 -0
- data/lib/blingfire/version.rb +1 -1
- data/lib/blingfire.rb +11 -2
- data/vendor/blingfiretokdll.dll +0 -0
- data/vendor/libblingfiretokdll.arm64.dylib +0 -0
- data/vendor/libblingfiretokdll.arm64.so +0 -0
- data/vendor/libblingfiretokdll.dylib +0 -0
- data/vendor/libblingfiretokdll.so +0 -0
- metadata +6 -48
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 466b528fca6404415ad072b597cbb6fc96628a325504aba0e7becc4793b35b3f
|
4
|
+
data.tar.gz: c1e02e1f8c48578c6407ac9f7d7a996ecd78205ffdcaa3158144012f61d8922e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cbfd60e8e54b42a1e7258afcebb51d27029bf66a2969daf4badc62f3117d7113b8d02f99c3445513dc37f08698205c21ba97787ba761512166257438b73d3956
|
7
|
+
data.tar.gz: 1aa595a55116d5e084d31e087180ed04392d1b7d91932a875d53cab88bc72b3a1ae179c15094b9679aa9fa78dc4419afc70f144ba971343d0c465a7842ca634e
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
-
# Bling Fire
|
1
|
+
# Bling Fire Ruby
|
2
2
|
|
3
3
|
[Bling Fire](https://github.com/microsoft/BlingFire) - high speed text tokenization - for Ruby
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/blingfire/workflows/build/badge.svg?branch=master)](https://github.com/ankane/blingfire/actions)
|
5
|
+
[![Build Status](https://github.com/ankane/blingfire-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/blingfire-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
9
9
|
Add this line to your application’s Gemfile:
|
10
10
|
|
11
11
|
```ruby
|
12
|
-
gem
|
12
|
+
gem "blingfire"
|
13
13
|
```
|
14
14
|
|
15
15
|
## Getting Started
|
@@ -82,24 +82,38 @@ Disable prefix space
|
|
82
82
|
model = BlingFire.load_model("roberta.bin", prefix: false)
|
83
83
|
```
|
84
84
|
|
85
|
+
## Ids to Text [experimental]
|
86
|
+
|
87
|
+
Load a model
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
model = BlingFire.load_model("bert_base_tok.i2w")
|
91
|
+
```
|
92
|
+
|
93
|
+
Convert ids to text
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
model.ids_to_text(ids)
|
97
|
+
```
|
98
|
+
|
85
99
|
## History
|
86
100
|
|
87
|
-
View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
|
101
|
+
View the [changelog](https://github.com/ankane/blingfire-ruby/blob/master/CHANGELOG.md)
|
88
102
|
|
89
103
|
## Contributing
|
90
104
|
|
91
105
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
92
106
|
|
93
|
-
- [Report bugs](https://github.com/ankane/blingfire/issues)
|
94
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/blingfire/pulls)
|
107
|
+
- [Report bugs](https://github.com/ankane/blingfire-ruby/issues)
|
108
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/blingfire-ruby/pulls)
|
95
109
|
- Write, clarify, or fix documentation
|
96
110
|
- Suggest or add new features
|
97
111
|
|
98
112
|
To get started with development:
|
99
113
|
|
100
114
|
```sh
|
101
|
-
git clone https://github.com/ankane/blingfire.git
|
102
|
-
cd blingfire
|
115
|
+
git clone https://github.com/ankane/blingfire-ruby.git
|
116
|
+
cd blingfire-ruby
|
103
117
|
bundle install
|
104
118
|
bundle exec rake vendor:all download:models
|
105
119
|
bundle exec rake test
|
data/lib/blingfire/ffi.rb
CHANGED
@@ -45,5 +45,8 @@ module BlingFire
|
|
45
45
|
|
46
46
|
# prefix
|
47
47
|
extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
|
48
|
+
|
49
|
+
# ids to text
|
50
|
+
extern "int IdsToText(void* ModelPtr, int32_t * pIdsArr, int IdsCount, char * pOutUtf8Str, int MaxOutUtf8StrByteCount, bool SkipSpecialTokens)"
|
48
51
|
end
|
49
52
|
end
|
data/lib/blingfire/model.rb
CHANGED
@@ -61,6 +61,14 @@ module BlingFire
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
+
def ids_to_text(ids, skip_special_tokens: true, output_buffer_size: nil)
|
65
|
+
if @handle
|
66
|
+
BlingFire.ids_to_text(@handle, ids, skip_special_tokens: skip_special_tokens, output_buffer_size: output_buffer_size)
|
67
|
+
else
|
68
|
+
raise "Not implemented"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
64
72
|
def to_ptr
|
65
73
|
@handle
|
66
74
|
end
|
data/lib/blingfire/version.rb
CHANGED
data/lib/blingfire.rb
CHANGED
@@ -15,13 +15,13 @@ module BlingFire
|
|
15
15
|
if Gem.win_platform?
|
16
16
|
"blingfiretokdll.dll"
|
17
17
|
elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
|
18
|
-
if RbConfig::CONFIG["host_cpu"] =~ /arm/i
|
18
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
19
19
|
"libblingfiretokdll.arm64.dylib"
|
20
20
|
else
|
21
21
|
"libblingfiretokdll.dylib"
|
22
22
|
end
|
23
23
|
else
|
24
|
-
if RbConfig::CONFIG["host_cpu"] =~ /aarch64/i
|
24
|
+
if RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
|
25
25
|
"libblingfiretokdll.arm64.so"
|
26
26
|
else
|
27
27
|
"libblingfiretokdll.so"
|
@@ -113,6 +113,15 @@ module BlingFire
|
|
113
113
|
[result].concat(unpack_offsets(start_offsets, end_offsets, result, text))
|
114
114
|
end
|
115
115
|
|
116
|
+
def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
|
117
|
+
output_buffer_size ||= ids.size * 32
|
118
|
+
c_ids = Fiddle::Pointer[ids.pack("i*")]
|
119
|
+
out = Fiddle::Pointer.malloc(output_buffer_size)
|
120
|
+
out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
|
121
|
+
check_status out_size, out
|
122
|
+
encode_utf8(out.to_str(out_size - 1))
|
123
|
+
end
|
124
|
+
|
116
125
|
def free_model(model)
|
117
126
|
FFI.FreeModel(model)
|
118
127
|
end
|
data/vendor/blingfiretokdll.dll
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,59 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blingfire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: minitest
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '5'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '5'
|
11
|
+
date: 2023-02-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
55
13
|
description:
|
56
|
-
email: andrew@
|
14
|
+
email: andrew@ankane.org
|
57
15
|
executables: []
|
58
16
|
extensions: []
|
59
17
|
extra_rdoc_files: []
|
@@ -71,7 +29,7 @@ files:
|
|
71
29
|
- vendor/libblingfiretokdll.arm64.so
|
72
30
|
- vendor/libblingfiretokdll.dylib
|
73
31
|
- vendor/libblingfiretokdll.so
|
74
|
-
homepage: https://github.com/ankane/blingfire
|
32
|
+
homepage: https://github.com/ankane/blingfire-ruby
|
75
33
|
licenses:
|
76
34
|
- MIT
|
77
35
|
metadata: {}
|
@@ -90,7 +48,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
48
|
- !ruby/object:Gem::Version
|
91
49
|
version: '0'
|
92
50
|
requirements: []
|
93
|
-
rubygems_version: 3.
|
51
|
+
rubygems_version: 3.4.1
|
94
52
|
signing_key:
|
95
53
|
specification_version: 4
|
96
54
|
summary: High speed text tokenization for Ruby
|