blingfire 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2585ca0684a0d6af6beaae99ab7b5250eb928c5200cbfb6274dcd2cbc914dccc
4
- data.tar.gz: fe4681abb0c63e7d8fd0e8a777abaed95e578bbb0cfd7238bacbe9beecd3d07c
3
+ metadata.gz: fa9f7ebf09e5745b6865d4c5645b36f42b7c484ed667ce493febee80887e89ad
4
+ data.tar.gz: 7fb6e8716138c35396964081b8ad60d7c01db88e71e4fbe219d0ea27a6eff94b
5
5
  SHA512:
6
- metadata.gz: 9a02c3f87eea7ea989f73032388f4abe69b30055f1ea7a96aa302385a2f6038eb833cffe00b15e5b69124d7a11bf8dd031067a4a16b415999955b8ce393f25d4
7
- data.tar.gz: e696df2343cab6ee82af95bedcfc45457b69287113f819514851ef5f943a2f0f8c165b21abff0f944dcd96d4cab45b8faa1190940bf36c294c9c98d55bd9d980
6
+ metadata.gz: 4f1e20f3520f4be17af2df5db4b3e05b169756f1667dccb08dbafb89ee07f633862d16649fa8e3be505073ca58a11989cc5c9b99d3f571aae2eaff3d4053dd8c
7
+ data.tar.gz: 8f70ecb06ddcad466508a0debe63fd99e9a5cbab35e3702296c8ff522ffbb5075ad52b2ce68d2f316de6685256b756ff098bc5a2b29e807652b89de84f9b4661
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.6 (2021-06-07)
2
+
3
+ - Updated Bling Fire to 0.1.7
4
+ - Added `prefix` option
5
+
1
6
  ## 0.1.5 (2021-03-14)
2
7
 
3
8
  - Updated Bling Fire to 0.1.5
data/README.md CHANGED
@@ -76,6 +76,12 @@ Get offsets for ids
76
76
  ids, start_offsets, end_offsets = model.text_to_ids_with_offsets(text)
77
77
  ```
78
78
 
79
+ Disable prefix space
80
+
81
+ ```ruby
82
+ model = BlingFire.load_model("roberta.bin", prefix: false)
83
+ ```
84
+
79
85
  ## History
80
86
 
81
87
  View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
data/lib/blingfire.rb CHANGED
@@ -38,8 +38,8 @@ module BlingFire
38
38
  FFI.GetBlingFireTokVersion
39
39
  end
40
40
 
41
- def load_model(path)
42
- Model.new(path)
41
+ def load_model(path, **options)
42
+ Model.new(path, **options)
43
43
  end
44
44
 
45
45
  def text_to_words(text)
@@ -126,6 +126,12 @@ module BlingFire
126
126
  encode_utf8(out.to_str(out_size))
127
127
  end
128
128
 
129
+ def change_settings_dummy_prefix(model, value)
130
+ # use opposite of value
131
+ ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1)
132
+ raise Error, "Bad status: #{ret}" if ret != 1
133
+ end
134
+
129
135
  private
130
136
 
131
137
  def check_status(ret, ptr)
@@ -171,7 +177,7 @@ module BlingFire
171
177
  # TODO see if more efficient to store next_pos in variable
172
178
  pos = 0
173
179
  text.each_char.with_index do |c, i|
174
- while pos == start_bytes[starts.size]
180
+ while pos == start_bytes[starts.size] || start_bytes[starts.size] == -1
175
181
  starts << i
176
182
  end
177
183
  pos += c.bytesize
data/lib/blingfire/ffi.rb CHANGED
@@ -10,6 +10,8 @@ module BlingFire
10
10
  raise e
11
11
  end
12
12
 
13
+ typealias "bool", "char"
14
+
13
15
  # https://github.com/microsoft/BlingFire/blob/master/blingfiretools/blingfiretokdll/blingfiretokdll.cpp
14
16
 
15
17
  # version
@@ -40,5 +42,8 @@ module BlingFire
40
42
 
41
43
  # free model
42
44
  extern "int FreeModel(void* ModelPtr)"
45
+
46
+ # prefix
47
+ extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
43
48
  end
44
49
  end
@@ -1,11 +1,15 @@
1
1
  module BlingFire
2
2
  class Model
3
- def initialize(path = nil)
3
+ def initialize(path = nil, prefix: nil)
4
4
  @handle = nil
5
5
  if path
6
6
  raise Error, "Model not found" unless File.exist?(path)
7
7
  @handle = FFI.LoadModel(path)
8
8
  ObjectSpace.define_finalizer(self, self.class.finalize(@handle))
9
+
10
+ BlingFire.change_settings_dummy_prefix(@handle, prefix) unless prefix.nil?
11
+ else
12
+ raise Error, "prefix option requires path" unless prefix.nil?
9
13
  end
10
14
  end
11
15
 
@@ -1,3 +1,3 @@
1
1
  module BlingFire
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
Binary file
Binary file
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blingfire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-15 00:00:00.000000000 Z
11
+ date: 2021-06-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler