blingfire 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0a63930943ea4fe6a3dcbf4dd30219c7986b5f9e1cdcff9e6ada001a00399761
4
- data.tar.gz: d14acf9c98a6ffc8d1917d3d29fb14be4f68ce9dd7519a7e4d79a8ba4dda3a3d
3
+ metadata.gz: a8bcd44e1517ca140f88c58672f9ce09644e4498add5763ec41a5551e8c281ab
4
+ data.tar.gz: 47c29e51a1f442ecc3a033a73d0c4916e8eebd8be194d46389e3cb64a98a1e8a
5
5
  SHA512:
6
- metadata.gz: 538838c9d5c59fbd2fcdec2742500d76b9d4cf6a115c673fccf87c84f393af463524e6e0c19bf7bbaafa26237e3a61e96e68732fc59c97d97a7a7605c7e0e63c
7
- data.tar.gz: dc75f4af27a2d62d3a0edba001dc99c848c534ea54a2bec32f3fb7e1eadb5ff8e7c2864ce6dd771cf3d7676ed355074f278f18b819f8422566038dc985958daf
6
+ metadata.gz: 43d1be1d30fcadf809aab8728d8a7d6d7b57128dc4f7e8784072e469e43fd01ff3a463ecc915563d3bc3c84c7265bfad7b854c26759bc6eee0b47196c79c1456
7
+ data.tar.gz: 6fc78f981153589681ba6016a9d0e2aaa264d68a551b7656db03e353aaa2bd9441ba9653bece7d6f886063ebca25d8006804d54328c207a6b01719a63f664e53
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.3.1 (2025-05-04)
2
+
3
+ - Fixed issue with Fiddle 1.1.7+
4
+ - Fixed memory leaks
5
+
1
6
  ## 0.3.0 (2025-04-03)
2
7
 
3
8
  - Dropped support for Ruby < 3.2
data/lib/blingfire/ffi.rb CHANGED
@@ -10,8 +10,6 @@ module BlingFire
10
10
  raise e
11
11
  end
12
12
 
13
- typealias "bool", "char"
14
-
15
13
  # https://github.com/microsoft/BlingFire/blob/master/blingfiretools/blingfiretokdll/blingfiretokdll.cpp
16
14
 
17
15
  # version
@@ -4,7 +4,7 @@ module BlingFire
4
4
  @handle = nil
5
5
  if path
6
6
  raise Error, "Model not found" unless File.exist?(path)
7
- @handle = FFI.LoadModel(path)
7
+ @handle = FFI.LoadModel(+path)
8
8
  @handle.free = FFI["FreeModel"]
9
9
 
10
10
  BlingFire.change_settings_dummy_prefix(@handle, prefix) unless prefix.nil?
@@ -1,3 +1,3 @@
1
1
  module BlingFire
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
data/lib/blingfire.rb CHANGED
@@ -92,20 +92,20 @@ module BlingFire
92
92
 
93
93
  def text_to_ids(model, text, max_len = nil, unk_id = 0)
94
94
  text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
95
- ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT)
96
- out_size = FFI.TextToIds(model, text, text.bytesize, ids, ids.size, unk_id)
95
+ ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT, Fiddle::RUBY_FREE)
96
+ out_size = FFI.TextToIds(model, +text, text.bytesize, ids, ids.size, unk_id)
97
97
  check_status out_size, ids
98
98
  ids[0, (max_len || out_size) * Fiddle::SIZEOF_INT].unpack("i!*")
99
99
  end
100
100
 
101
101
  def text_to_ids_with_offsets(model, text, max_len = nil, unk_id = 0)
102
102
  text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
103
- ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT)
103
+ ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT, Fiddle::RUBY_FREE)
104
104
 
105
- start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size)
106
- end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size)
105
+ start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size, Fiddle::RUBY_FREE)
106
+ end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size, Fiddle::RUBY_FREE)
107
107
 
108
- out_size = FFI.TextToIdsWithOffsets(model, text, text.bytesize, ids, start_offsets, end_offsets, ids.size, unk_id)
108
+ out_size = FFI.TextToIdsWithOffsets(model, +text, text.bytesize, ids, start_offsets, end_offsets, ids.size, unk_id)
109
109
 
110
110
  check_status out_size, ids
111
111
 
@@ -116,8 +116,8 @@ module BlingFire
116
116
  def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
117
117
  output_buffer_size ||= ids.size * 32
118
118
  c_ids = Fiddle::Pointer[ids.pack("i*")]
119
- out = Fiddle::Pointer.malloc(output_buffer_size)
120
- out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens ? 1 : 0)
119
+ out = Fiddle::Pointer.malloc(output_buffer_size, Fiddle::RUBY_FREE)
120
+ out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, !!skip_special_tokens)
121
121
  check_status out_size, out
122
122
  out_size <= 0 ? "" : encode_utf8(out.to_str(out_size - 1))
123
123
  end
@@ -129,15 +129,15 @@ module BlingFire
129
129
  def normalize_spaces(text)
130
130
  u_space = 0x20
131
131
  text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
132
- out = Fiddle::Pointer.malloc([text.bytesize * 1.5, 20].max)
133
- out_size = FFI.NormalizeSpaces(text, text.bytesize, out, out.size, u_space)
132
+ out = Fiddle::Pointer.malloc([text.bytesize * 1.5, 20].max, Fiddle::RUBY_FREE)
133
+ out_size = FFI.NormalizeSpaces(+text, text.bytesize, out, out.size, u_space)
134
134
  check_status out_size, out
135
135
  encode_utf8(out.to_str(out_size))
136
136
  end
137
137
 
138
138
  def change_settings_dummy_prefix(model, value)
139
139
  # use opposite of value
140
- ret = FFI.SetNoDummyPrefix(model, value ? 0 : 1)
140
+ ret = FFI.SetNoDummyPrefix(model, !value)
141
141
  raise Error, "Bad status: #{ret}" if ret != 1
142
142
  end
143
143
 
@@ -150,8 +150,8 @@ module BlingFire
150
150
  def text_to(text, sep)
151
151
  text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
152
152
  # TODO allocate less, and try again if needed
153
- out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max)
154
- out_size = yield(text, out)
153
+ out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max, Fiddle::RUBY_FREE)
154
+ out_size = yield(+text, out)
155
155
  check_status out_size, out
156
156
  encode_utf8(out.to_str(out_size - 1)).split(sep)
157
157
  end
@@ -159,12 +159,12 @@ module BlingFire
159
159
  def text_to_with_offsets(text, sep)
160
160
  text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
161
161
  # TODO allocate less, and try again if needed
162
- out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max)
162
+ out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max, Fiddle::RUBY_FREE)
163
163
 
164
- start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size)
165
- end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size)
164
+ start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size, Fiddle::RUBY_FREE)
165
+ end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size, Fiddle::RUBY_FREE)
166
166
 
167
- out_size = yield(text, out, start_offsets, end_offsets)
167
+ out_size = yield(+text, out, start_offsets, end_offsets)
168
168
 
169
169
  check_status out_size, out
170
170
 
@@ -177,8 +177,8 @@ module BlingFire
177
177
  end
178
178
 
179
179
  def unpack_offsets(start_offsets, end_offsets, result, text)
180
- start_bytes = start_offsets.to_s(Fiddle::SIZEOF_INT * result.size).unpack("i*")
181
- end_bytes = end_offsets.to_s(Fiddle::SIZEOF_INT * result.size).unpack("i*")
180
+ start_bytes = start_offsets.to_str(Fiddle::SIZEOF_INT * result.size).unpack("i*")
181
+ end_bytes = end_offsets.to_str(Fiddle::SIZEOF_INT * result.size).unpack("i*")
182
182
  starts = []
183
183
  ends = []
184
184
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blingfire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-04-03 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: fiddle
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: '0'
18
+ version: 1.1.7
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: '0'
25
+ version: 1.1.7
26
26
  email: andrew@ankane.org
27
27
  executables: []
28
28
  extensions: []
@@ -59,7 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
59
59
  - !ruby/object:Gem::Version
60
60
  version: '0'
61
61
  requirements: []
62
- rubygems_version: 3.6.2
62
+ rubygems_version: 3.6.7
63
63
  specification_version: 4
64
64
  summary: High speed text tokenization for Ruby
65
65
  test_files: []