blingfire 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/blingfire/ffi.rb +0 -2
- data/lib/blingfire/model.rb +1 -1
- data/lib/blingfire/version.rb +1 -1
- data/lib/blingfire.rb +19 -19
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a8bcd44e1517ca140f88c58672f9ce09644e4498add5763ec41a5551e8c281ab
|
4
|
+
data.tar.gz: 47c29e51a1f442ecc3a033a73d0c4916e8eebd8be194d46389e3cb64a98a1e8a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 43d1be1d30fcadf809aab8728d8a7d6d7b57128dc4f7e8784072e469e43fd01ff3a463ecc915563d3bc3c84c7265bfad7b854c26759bc6eee0b47196c79c1456
|
7
|
+
data.tar.gz: 6fc78f981153589681ba6016a9d0e2aaa264d68a551b7656db03e353aaa2bd9441ba9653bece7d6f886063ebca25d8006804d54328c207a6b01719a63f664e53
|
data/CHANGELOG.md
CHANGED
data/lib/blingfire/ffi.rb
CHANGED
data/lib/blingfire/model.rb
CHANGED
@@ -4,7 +4,7 @@ module BlingFire
|
|
4
4
|
@handle = nil
|
5
5
|
if path
|
6
6
|
raise Error, "Model not found" unless File.exist?(path)
|
7
|
-
@handle = FFI.LoadModel(path)
|
7
|
+
@handle = FFI.LoadModel(+path)
|
8
8
|
@handle.free = FFI["FreeModel"]
|
9
9
|
|
10
10
|
BlingFire.change_settings_dummy_prefix(@handle, prefix) unless prefix.nil?
|
data/lib/blingfire/version.rb
CHANGED
data/lib/blingfire.rb
CHANGED
@@ -92,20 +92,20 @@ module BlingFire
|
|
92
92
|
|
93
93
|
def text_to_ids(model, text, max_len = nil, unk_id = 0)
|
94
94
|
text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
|
95
|
-
ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT)
|
96
|
-
out_size = FFI.TextToIds(model, text, text.bytesize, ids, ids.size, unk_id)
|
95
|
+
ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT, Fiddle::RUBY_FREE)
|
96
|
+
out_size = FFI.TextToIds(model, +text, text.bytesize, ids, ids.size, unk_id)
|
97
97
|
check_status out_size, ids
|
98
98
|
ids[0, (max_len || out_size) * Fiddle::SIZEOF_INT].unpack("i!*")
|
99
99
|
end
|
100
100
|
|
101
101
|
def text_to_ids_with_offsets(model, text, max_len = nil, unk_id = 0)
|
102
102
|
text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
|
103
|
-
ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT)
|
103
|
+
ids = Fiddle::Pointer.malloc((max_len || text.size) * Fiddle::SIZEOF_INT, Fiddle::RUBY_FREE)
|
104
104
|
|
105
|
-
start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size)
|
106
|
-
end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size)
|
105
|
+
start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size, Fiddle::RUBY_FREE)
|
106
|
+
end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * ids.size, Fiddle::RUBY_FREE)
|
107
107
|
|
108
|
-
out_size = FFI.TextToIdsWithOffsets(model, text, text.bytesize, ids, start_offsets, end_offsets, ids.size, unk_id)
|
108
|
+
out_size = FFI.TextToIdsWithOffsets(model, +text, text.bytesize, ids, start_offsets, end_offsets, ids.size, unk_id)
|
109
109
|
|
110
110
|
check_status out_size, ids
|
111
111
|
|
@@ -116,8 +116,8 @@ module BlingFire
|
|
116
116
|
def ids_to_text(model, ids, skip_special_tokens: true, output_buffer_size: nil)
|
117
117
|
output_buffer_size ||= ids.size * 32
|
118
118
|
c_ids = Fiddle::Pointer[ids.pack("i*")]
|
119
|
-
out = Fiddle::Pointer.malloc(output_buffer_size)
|
120
|
-
out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, skip_special_tokens
|
119
|
+
out = Fiddle::Pointer.malloc(output_buffer_size, Fiddle::RUBY_FREE)
|
120
|
+
out_size = FFI.IdsToText(model, c_ids, ids.size, out, output_buffer_size, !!skip_special_tokens)
|
121
121
|
check_status out_size, out
|
122
122
|
out_size <= 0 ? "" : encode_utf8(out.to_str(out_size - 1))
|
123
123
|
end
|
@@ -129,15 +129,15 @@ module BlingFire
|
|
129
129
|
def normalize_spaces(text)
|
130
130
|
u_space = 0x20
|
131
131
|
text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
|
132
|
-
out = Fiddle::Pointer.malloc([text.bytesize * 1.5, 20].max)
|
133
|
-
out_size = FFI.NormalizeSpaces(text, text.bytesize, out, out.size, u_space)
|
132
|
+
out = Fiddle::Pointer.malloc([text.bytesize * 1.5, 20].max, Fiddle::RUBY_FREE)
|
133
|
+
out_size = FFI.NormalizeSpaces(+text, text.bytesize, out, out.size, u_space)
|
134
134
|
check_status out_size, out
|
135
135
|
encode_utf8(out.to_str(out_size))
|
136
136
|
end
|
137
137
|
|
138
138
|
def change_settings_dummy_prefix(model, value)
|
139
139
|
# use opposite of value
|
140
|
-
ret = FFI.SetNoDummyPrefix(model, value
|
140
|
+
ret = FFI.SetNoDummyPrefix(model, !value)
|
141
141
|
raise Error, "Bad status: #{ret}" if ret != 1
|
142
142
|
end
|
143
143
|
|
@@ -150,8 +150,8 @@ module BlingFire
|
|
150
150
|
def text_to(text, sep)
|
151
151
|
text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
|
152
152
|
# TODO allocate less, and try again if needed
|
153
|
-
out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max)
|
154
|
-
out_size = yield(text, out)
|
153
|
+
out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max, Fiddle::RUBY_FREE)
|
154
|
+
out_size = yield(+text, out)
|
155
155
|
check_status out_size, out
|
156
156
|
encode_utf8(out.to_str(out_size - 1)).split(sep)
|
157
157
|
end
|
@@ -159,12 +159,12 @@ module BlingFire
|
|
159
159
|
def text_to_with_offsets(text, sep)
|
160
160
|
text = encode_utf8(text.dup) unless text.encoding == Encoding::UTF_8
|
161
161
|
# TODO allocate less, and try again if needed
|
162
|
-
out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max)
|
162
|
+
out = Fiddle::Pointer.malloc([text.bytesize * 3, 20].max, Fiddle::RUBY_FREE)
|
163
163
|
|
164
|
-
start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size)
|
165
|
-
end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size)
|
164
|
+
start_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size, Fiddle::RUBY_FREE)
|
165
|
+
end_offsets = Fiddle::Pointer.malloc(Fiddle::SIZEOF_INT * out.size, Fiddle::RUBY_FREE)
|
166
166
|
|
167
|
-
out_size = yield(text, out, start_offsets, end_offsets)
|
167
|
+
out_size = yield(+text, out, start_offsets, end_offsets)
|
168
168
|
|
169
169
|
check_status out_size, out
|
170
170
|
|
@@ -177,8 +177,8 @@ module BlingFire
|
|
177
177
|
end
|
178
178
|
|
179
179
|
def unpack_offsets(start_offsets, end_offsets, result, text)
|
180
|
-
start_bytes = start_offsets.
|
181
|
-
end_bytes = end_offsets.
|
180
|
+
start_bytes = start_offsets.to_str(Fiddle::SIZEOF_INT * result.size).unpack("i*")
|
181
|
+
end_bytes = end_offsets.to_str(Fiddle::SIZEOF_INT * result.size).unpack("i*")
|
182
182
|
starts = []
|
183
183
|
ends = []
|
184
184
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blingfire
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: fiddle
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
18
|
+
version: 1.1.7
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version:
|
25
|
+
version: 1.1.7
|
26
26
|
email: andrew@ankane.org
|
27
27
|
executables: []
|
28
28
|
extensions: []
|
@@ -59,7 +59,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
59
59
|
- !ruby/object:Gem::Version
|
60
60
|
version: '0'
|
61
61
|
requirements: []
|
62
|
-
rubygems_version: 3.6.
|
62
|
+
rubygems_version: 3.6.7
|
63
63
|
specification_version: 4
|
64
64
|
summary: High speed text tokenization for Ruby
|
65
65
|
test_files: []
|