blingfire 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2585ca0684a0d6af6beaae99ab7b5250eb928c5200cbfb6274dcd2cbc914dccc
4
- data.tar.gz: fe4681abb0c63e7d8fd0e8a777abaed95e578bbb0cfd7238bacbe9beecd3d07c
3
+ metadata.gz: fa9f7ebf09e5745b6865d4c5645b36f42b7c484ed667ce493febee80887e89ad
4
+ data.tar.gz: 7fb6e8716138c35396964081b8ad60d7c01db88e71e4fbe219d0ea27a6eff94b
5
5
  SHA512:
6
- metadata.gz: 9a02c3f87eea7ea989f73032388f4abe69b30055f1ea7a96aa302385a2f6038eb833cffe00b15e5b69124d7a11bf8dd031067a4a16b415999955b8ce393f25d4
7
- data.tar.gz: e696df2343cab6ee82af95bedcfc45457b69287113f819514851ef5f943a2f0f8c165b21abff0f944dcd96d4cab45b8faa1190940bf36c294c9c98d55bd9d980
6
+ metadata.gz: 4f1e20f3520f4be17af2df5db4b3e05b169756f1667dccb08dbafb89ee07f633862d16649fa8e3be505073ca58a11989cc5c9b99d3f571aae2eaff3d4053dd8c
7
+ data.tar.gz: 8f70ecb06ddcad466508a0debe63fd99e9a5cbab35e3702296c8ff522ffbb5075ad52b2ce68d2f316de6685256b756ff098bc5a2b29e807652b89de84f9b4661
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.6 (2021-06-07)
2
+
3
+ - Updated Bling Fire to 0.1.7
4
+ - Added `prefix` option
5
+
1
6
  ## 0.1.5 (2021-03-14)
2
7
 
3
8
  - Updated Bling Fire to 0.1.5
data/README.md CHANGED
@@ -76,6 +76,12 @@ Get offsets for ids
76
76
  ids, start_offsets, end_offsets = model.text_to_ids_with_offsets(text)
77
77
  ```
78
78
 
79
+ Disable prefix space
80
+
81
+ ```ruby
82
+ model = BlingFire.load_model("roberta.bin", prefix: false)
83
+ ```
84
+
79
85
  ## History
80
86
 
81
87
  View the [changelog](https://github.com/ankane/blingfire/blob/master/CHANGELOG.md)
data/lib/blingfire.rb CHANGED
@@ -38,8 +38,8 @@ module BlingFire
38
38
  FFI.GetBlingFireTokVersion
39
39
  end
40
40
 
41
- def load_model(path)
42
- Model.new(path)
41
+ def load_model(path, **options)
42
+ Model.new(path, **options)
43
43
  end
44
44
 
45
45
  def text_to_words(text)
@@ -126,6 +126,12 @@ module BlingFire
126
126
  encode_utf8(out.to_str(out_size))
127
127
  end
128
128
 
129
+ def change_settings_dummy_prefix(model, value)
130
+ # use opposite of value
131
+ ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1)
132
+ raise Error, "Bad status: #{ret}" if ret != 1
133
+ end
134
+
129
135
  private
130
136
 
131
137
  def check_status(ret, ptr)
@@ -171,7 +177,7 @@ module BlingFire
171
177
  # TODO see if more efficient to store next_pos in variable
172
178
  pos = 0
173
179
  text.each_char.with_index do |c, i|
174
- while pos == start_bytes[starts.size]
180
+ while pos == start_bytes[starts.size] || start_bytes[starts.size] == -1
175
181
  starts << i
176
182
  end
177
183
  pos += c.bytesize
data/lib/blingfire/ffi.rb CHANGED
@@ -10,6 +10,8 @@ module BlingFire
10
10
  raise e
11
11
  end
12
12
 
13
+ typealias "bool", "char"
14
+
13
15
  # https://github.com/microsoft/BlingFire/blob/master/blingfiretools/blingfiretokdll/blingfiretokdll.cpp
14
16
 
15
17
  # version
@@ -40,5 +42,8 @@ module BlingFire
40
42
 
41
43
  # free model
42
44
  extern "int FreeModel(void* ModelPtr)"
45
+
46
+ # prefix
47
+ extern "int SetNoDummyPrefix(void* ModelPtr, bool fNoDummyPrefix)"
43
48
  end
44
49
  end
@@ -1,11 +1,15 @@
1
1
  module BlingFire
2
2
  class Model
3
- def initialize(path = nil)
3
+ def initialize(path = nil, prefix: nil)
4
4
  @handle = nil
5
5
  if path
6
6
  raise Error, "Model not found" unless File.exist?(path)
7
7
  @handle = FFI.LoadModel(path)
8
8
  ObjectSpace.define_finalizer(self, self.class.finalize(@handle))
9
+
10
+ BlingFire.change_settings_dummy_prefix(@handle, prefix) unless prefix.nil?
11
+ else
12
+ raise Error, "prefix option requires path" unless prefix.nil?
9
13
  end
10
14
  end
11
15
 
@@ -1,3 +1,3 @@
1
1
  module BlingFire
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
Binary file
Binary file
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blingfire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-15 00:00:00.000000000 Z
11
+ date: 2021-06-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler