blingfire 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa9f7ebf09e5745b6865d4c5645b36f42b7c484ed667ce493febee80887e89ad
4
- data.tar.gz: 7fb6e8716138c35396964081b8ad60d7c01db88e71e4fbe219d0ea27a6eff94b
3
+ metadata.gz: 4c6b1fa4c1af8020140b480f8c3579bd62232b25d3cbe470a5149ecac7279a8c
4
+ data.tar.gz: ea2c66e829368d3858759edb0e0b5644e2f1abc14458d6a8a11e618aacb33951
5
5
  SHA512:
6
- metadata.gz: 4f1e20f3520f4be17af2df5db4b3e05b169756f1667dccb08dbafb89ee07f633862d16649fa8e3be505073ca58a11989cc5c9b99d3f571aae2eaff3d4053dd8c
7
- data.tar.gz: 8f70ecb06ddcad466508a0debe63fd99e9a5cbab35e3702296c8ff522ffbb5075ad52b2ce68d2f316de6685256b756ff098bc5a2b29e807652b89de84f9b4661
6
+ metadata.gz: 714e09d5190152a1009c33000651675405a3f6402f1be056ecce74d73c1aa16c265d3c68245cb0118b5fe6e693d35f21b64b372b8f9d91ae445f063d92c22678
7
+ data.tar.gz: 3ec81c37b184e4363b4faf16d89a9869362c0655e19be451540dada56f32cba607fc7fe94dbe9210da3a864dc22dd692fc161619d2c80e6cdf0847b8f8029305
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.7 (2021-09-24)
2
+
3
+ - Updated Bling Fire to 0.1.8
4
+ - Added `ids_to_text` method
5
+
1
6
  ## 0.1.6 (2021-06-07)
2
7
 
3
8
  - Updated Bling Fire to 0.1.7
data/README.md CHANGED
@@ -82,6 +82,20 @@ Disable prefix space
82
82
  model = BlingFire.load_model("roberta.bin", prefix: false)
83
83
  ```
84
84
 
85
+ ## Ids to Text [experimental]
86
+
87
+ Load a model
88
+
89
+ ```ruby
90
+ model = BlingFire.load_model("bert_base_tok.i2w")
91
+ ```
92
+
93
+ Convert ids to text
94
+
95
+ ```ruby
96
+ model.ids_to_text(ids)
97
+ ```
98
+
85
99
  ## History
86
100
 
87
101
  View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
data/lib/blingfire/ffi.rb CHANGED
@@ -45,5 +45,8 @@ module BlingFire
45
45
 
46
46
  # prefix
47
47
  extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
48
+
49
+ # ids to text
50
+ extern "int IdsToText(void* ModelPtr, int32_t * pIdsArr, int IdsCount, char * pOutUtf8Str, int MaxOutUtf8StrByteCount, bool SkipSpecialTokens)"
48
51
  end
49
52
  end
@@ -61,6 +61,14 @@ module BlingFire
61
61
  end
62
62
  end
63
63
 
64
+ def ids_to_text(ids, skip_special_tokens: true, output_buffer_size: nil)
65
+ if @handle
66
+ BlingFire.ids_to_text(@handle, ids, skip_special_tokens: skip_special_tokens, output_buffer_size: output_buffer_size)
67
+ else
68
+ raise "Not implemented"
69
+ end
70
+ end
71
+
64
72
  def to_ptr
65
73
  @handle
66
74
  end
@@ -1,3 +1,3 @@
1
1
  module BlingFire
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
data/lib/blingfire.rb CHANGED
@@ -113,6 +113,15 @@ module BlingFire
113
113
  [result].concat(unpack_offsets(start_offsets, end_offsets, result, text))
114
114
  end
115
115
 
116
+ def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
117
+ output_buffer_size ||= ids.size * 32
118
+ c_ids = Fiddle::Pointer[ids.pack("i*")]
119
+ out = Fiddle::Pointer.malloc(output_buffer_size)
120
+ out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
121
+ check_status out_size, out
122
+ encode_utf8(out.to_str(out_size - 1))
123
+ end
124
+
116
125
  def free_model(model)
117
126
  FFI.FreeModel(model)
118
127
  end
Binary file
Binary file
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blingfire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-07 00:00:00.000000000 Z
11
+ date: 2021-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -90,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
90
  - !ruby/object:Gem::Version
91
91
  version: '0'
92
92
  requirements: []
93
- rubygems_version: 3.2.3
93
+ rubygems_version: 3.2.22
94
94
  signing_key:
95
95
  specification_version: 4
96
96
  summary: High speed text tokenization for Ruby