blingfire 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa9f7ebf09e5745b6865d4c5645b36f42b7c484ed667ce493febee80887e89ad
4
- data.tar.gz: 7fb6e8716138c35396964081b8ad60d7c01db88e71e4fbe219d0ea27a6eff94b
3
+ metadata.gz: 4c6b1fa4c1af8020140b480f8c3579bd62232b25d3cbe470a5149ecac7279a8c
4
+ data.tar.gz: ea2c66e829368d3858759edb0e0b5644e2f1abc14458d6a8a11e618aacb33951
5
5
  SHA512:
6
- metadata.gz: 4f1e20f3520f4be17af2df5db4b3e05b169756f1667dccb08dbafb89ee07f633862d16649fa8e3be505073ca58a11989cc5c9b99d3f571aae2eaff3d4053dd8c
7
- data.tar.gz: 8f70ecb06ddcad466508a0debe63fd99e9a5cbab35e3702296c8ff522ffbb5075ad52b2ce68d2f316de6685256b756ff098bc5a2b29e807652b89de84f9b4661
6
+ metadata.gz: 714e09d5190152a1009c33000651675405a3f6402f1be056ecce74d73c1aa16c265d3c68245cb0118b5fe6e693d35f21b64b372b8f9d91ae445f063d92c22678
7
+ data.tar.gz: 3ec81c37b184e4363b4faf16d89a9869362c0655e19be451540dada56f32cba607fc7fe94dbe9210da3a864dc22dd692fc161619d2c80e6cdf0847b8f8029305
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.7 (2021-09-24)
2
+
3
+ - Updated Bling Fire to 0.1.8
4
+ - Added `ids_to_text` method
5
+
1
6
  ## 0.1.6 (2021-06-07)
2
7
 
3
8
  - Updated Bling Fire to 0.1.7
data/README.md CHANGED
@@ -82,6 +82,20 @@ Disable prefix space
82
82
  model = BlingFire.load_model("roberta.bin", prefix: false)
83
83
  ```
84
84
 
85
+ ## Ids to Text [experimental]
86
+
87
+ Load a model
88
+
89
+ ```ruby
90
+ model = BlingFire.load_model("bert_base_tok.i2w")
91
+ ```
92
+
93
+ Convert ids to text
94
+
95
+ ```ruby
96
+ model.ids_to_text(ids)
97
+ ```
98
+
85
99
  ## History
86
100
 
87
101
  View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
data/lib/blingfire/ffi.rb CHANGED
@@ -45,5 +45,8 @@ module BlingFire
45
45
 
46
46
  # prefix
47
47
  extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
48
+
49
+ # ids to text
50
+ extern "int IdsToText(void* ModelPtr, int32_t * pIdsArr, int IdsCount, char * pOutUtf8Str, int MaxOutUtf8StrByteCount, bool SkipSpecialTokens)"
48
51
  end
49
52
  end
@@ -61,6 +61,14 @@ module BlingFire
61
61
  end
62
62
  end
63
63
 
64
+ def ids_to_text(ids, skip_special_tokens: true, output_buffer_size: nil)
65
+ if @handle
66
+ BlingFire.ids_to_text(@handle, ids, skip_special_tokens: skip_special_tokens, output_buffer_size: output_buffer_size)
67
+ else
68
+ raise "Not implemented"
69
+ end
70
+ end
71
+
64
72
  def to_ptr
65
73
  @handle
66
74
  end
@@ -1,3 +1,3 @@
1
1
  module BlingFire
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
data/lib/blingfire.rb CHANGED
@@ -113,6 +113,15 @@ module BlingFire
113
113
  [result].concat(unpack_offsets(start_offsets, end_offsets, result, text))
114
114
  end
115
115
 
116
+ def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
117
+ output_buffer_size ||= ids.size * 32
118
+ c_ids = Fiddle::Pointer[ids.pack("i*")]
119
+ out = Fiddle::Pointer.malloc(output_buffer_size)
120
+ out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
121
+ check_status out_size, out
122
+ encode_utf8(out.to_str(out_size - 1))
123
+ end
124
+
116
125
  def free_model(model)
117
126
  FFI.FreeModel(model)
118
127
  end
Binary file
Binary file
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blingfire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-07 00:00:00.000000000 Z
11
+ date: 2021-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -90,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
90
  - !ruby/object:Gem::Version
91
91
  version: '0'
92
92
  requirements: []
93
- rubygems_version: 3.2.3
93
+ rubygems_version: 3.2.22
94
94
  signing_key:
95
95
  specification_version: 4
96
96
  summary: High speed text tokenization for Ruby