tiktoken_ruby 0.0.8 → 0.0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +5 -0
- data/Cargo.lock +120 -134
- data/Gemfile.lock +40 -40
- data/Rakefile +6 -0
- data/ext/tiktoken_ruby/Cargo.toml +3 -3
- data/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +17 -14
- data/ext/tiktoken_ruby/src/lib.rs +6 -0
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +8 -2
- data/script/release +43 -0
- metadata +11 -10
- data/tiktoken_ruby.gemspec +0 -33
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 923d3291c75ea3e0d93b45c9a50e11323b421c3e6c93f140f8c5b64e708bc203
|
|
4
|
+
data.tar.gz: 2a1f843387a971b4b9735e52abf58471381bc592895af99b1b4091723b246266
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 98aa2b547a129a5377a838ddfae4df66aee8de87fea916787283dbd2227066d2f99808aa0b71a522e27b58dd59b214c000cc3b933ebf9b9ff2aad56c6cb43536
|
|
7
|
+
data.tar.gz: 3beb0aead95f22b024cbe4e7d105698a1c0dbacc247956d7c91dcf0e468103a577a4a0bb6bf8cb2402516eb9ad521acb3d3af1090f626ab59380eb886e6c06ec
|
data/Cargo.lock
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
1
|
# This file is automatically @generated by Cargo.
|
|
2
2
|
# It is not intended for manual editing.
|
|
3
|
-
version =
|
|
3
|
+
version = 4
|
|
4
4
|
|
|
5
5
|
[[package]]
|
|
6
6
|
name = "aho-corasick"
|
|
7
|
-
version = "
|
|
7
|
+
version = "1.1.3"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
-
checksum = "
|
|
9
|
+
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"memchr",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
14
|
[[package]]
|
|
15
15
|
name = "anyhow"
|
|
16
|
-
version = "1.0.
|
|
16
|
+
version = "1.0.95"
|
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
-
checksum = "
|
|
18
|
+
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
|
19
19
|
|
|
20
20
|
[[package]]
|
|
21
21
|
name = "autocfg"
|
|
22
|
-
version = "1.
|
|
22
|
+
version = "1.4.0"
|
|
23
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
24
|
-
checksum = "
|
|
24
|
+
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
|
25
25
|
|
|
26
26
|
[[package]]
|
|
27
27
|
name = "base64"
|
|
28
|
-
version = "0.21.
|
|
28
|
+
version = "0.21.7"
|
|
29
29
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
30
|
-
checksum = "
|
|
30
|
+
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
|
|
31
31
|
|
|
32
32
|
[[package]]
|
|
33
33
|
name = "bindgen"
|
|
34
|
-
version = "0.69.
|
|
34
|
+
version = "0.69.5"
|
|
35
35
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
|
-
checksum = "
|
|
36
|
+
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
|
37
37
|
dependencies = [
|
|
38
|
-
"bitflags
|
|
38
|
+
"bitflags",
|
|
39
39
|
"cexpr",
|
|
40
40
|
"clang-sys",
|
|
41
41
|
"itertools",
|
|
@@ -66,24 +66,17 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
|
66
66
|
|
|
67
67
|
[[package]]
|
|
68
68
|
name = "bitflags"
|
|
69
|
-
version = "
|
|
70
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
71
|
-
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
|
72
|
-
|
|
73
|
-
[[package]]
|
|
74
|
-
name = "bitflags"
|
|
75
|
-
version = "2.4.0"
|
|
69
|
+
version = "2.6.0"
|
|
76
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
77
|
-
checksum = "
|
|
71
|
+
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
|
78
72
|
|
|
79
73
|
[[package]]
|
|
80
74
|
name = "bstr"
|
|
81
|
-
version = "1.
|
|
75
|
+
version = "1.11.1"
|
|
82
76
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
83
|
-
checksum = "
|
|
77
|
+
checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8"
|
|
84
78
|
dependencies = [
|
|
85
79
|
"memchr",
|
|
86
|
-
"once_cell",
|
|
87
80
|
"regex-automata",
|
|
88
81
|
"serde",
|
|
89
82
|
]
|
|
@@ -105,9 +98,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
|
105
98
|
|
|
106
99
|
[[package]]
|
|
107
100
|
name = "clang-sys"
|
|
108
|
-
version = "1.
|
|
101
|
+
version = "1.8.1"
|
|
109
102
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
110
|
-
checksum = "
|
|
103
|
+
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
|
111
104
|
dependencies = [
|
|
112
105
|
"glob",
|
|
113
106
|
"libc",
|
|
@@ -116,25 +109,26 @@ dependencies = [
|
|
|
116
109
|
|
|
117
110
|
[[package]]
|
|
118
111
|
name = "either"
|
|
119
|
-
version = "1.
|
|
112
|
+
version = "1.13.0"
|
|
120
113
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
121
|
-
checksum = "
|
|
114
|
+
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
|
122
115
|
|
|
123
116
|
[[package]]
|
|
124
117
|
name = "fancy-regex"
|
|
125
|
-
version = "0.
|
|
118
|
+
version = "0.13.0"
|
|
126
119
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
127
|
-
checksum = "
|
|
120
|
+
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
|
128
121
|
dependencies = [
|
|
129
122
|
"bit-set",
|
|
130
|
-
"regex",
|
|
123
|
+
"regex-automata",
|
|
124
|
+
"regex-syntax",
|
|
131
125
|
]
|
|
132
126
|
|
|
133
127
|
[[package]]
|
|
134
128
|
name = "glob"
|
|
135
|
-
version = "0.3.
|
|
129
|
+
version = "0.3.2"
|
|
136
130
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
137
|
-
checksum = "
|
|
131
|
+
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
|
138
132
|
|
|
139
133
|
[[package]]
|
|
140
134
|
name = "itertools"
|
|
@@ -147,9 +141,9 @@ dependencies = [
|
|
|
147
141
|
|
|
148
142
|
[[package]]
|
|
149
143
|
name = "lazy_static"
|
|
150
|
-
version = "1.
|
|
144
|
+
version = "1.5.0"
|
|
151
145
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
152
|
-
checksum = "
|
|
146
|
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
153
147
|
|
|
154
148
|
[[package]]
|
|
155
149
|
name = "lazycell"
|
|
@@ -159,25 +153,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
|
159
153
|
|
|
160
154
|
[[package]]
|
|
161
155
|
name = "libc"
|
|
162
|
-
version = "0.2.
|
|
156
|
+
version = "0.2.169"
|
|
163
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
164
|
-
checksum = "
|
|
158
|
+
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
|
165
159
|
|
|
166
160
|
[[package]]
|
|
167
161
|
name = "libloading"
|
|
168
|
-
version = "0.
|
|
162
|
+
version = "0.8.6"
|
|
169
163
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
170
|
-
checksum = "
|
|
164
|
+
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
|
171
165
|
dependencies = [
|
|
172
166
|
"cfg-if",
|
|
173
|
-
"
|
|
167
|
+
"windows-targets",
|
|
174
168
|
]
|
|
175
169
|
|
|
176
170
|
[[package]]
|
|
177
171
|
name = "lock_api"
|
|
178
|
-
version = "0.4.
|
|
172
|
+
version = "0.4.12"
|
|
179
173
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
180
|
-
checksum = "
|
|
174
|
+
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
|
181
175
|
dependencies = [
|
|
182
176
|
"autocfg",
|
|
183
177
|
"scopeguard",
|
|
@@ -185,9 +179,9 @@ dependencies = [
|
|
|
185
179
|
|
|
186
180
|
[[package]]
|
|
187
181
|
name = "magnus"
|
|
188
|
-
version = "0.
|
|
182
|
+
version = "0.7.1"
|
|
189
183
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
190
|
-
checksum = "
|
|
184
|
+
checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
|
|
191
185
|
dependencies = [
|
|
192
186
|
"magnus-macros",
|
|
193
187
|
"rb-sys",
|
|
@@ -208,9 +202,9 @@ dependencies = [
|
|
|
208
202
|
|
|
209
203
|
[[package]]
|
|
210
204
|
name = "memchr"
|
|
211
|
-
version = "2.
|
|
205
|
+
version = "2.7.4"
|
|
212
206
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
213
|
-
checksum = "
|
|
207
|
+
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
|
214
208
|
|
|
215
209
|
[[package]]
|
|
216
210
|
name = "minimal-lexical"
|
|
@@ -228,17 +222,11 @@ dependencies = [
|
|
|
228
222
|
"minimal-lexical",
|
|
229
223
|
]
|
|
230
224
|
|
|
231
|
-
[[package]]
|
|
232
|
-
name = "once_cell"
|
|
233
|
-
version = "1.17.1"
|
|
234
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
235
|
-
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
|
|
236
|
-
|
|
237
225
|
[[package]]
|
|
238
226
|
name = "parking_lot"
|
|
239
|
-
version = "0.12.
|
|
227
|
+
version = "0.12.3"
|
|
240
228
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
241
|
-
checksum = "
|
|
229
|
+
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
|
242
230
|
dependencies = [
|
|
243
231
|
"lock_api",
|
|
244
232
|
"parking_lot_core",
|
|
@@ -246,49 +234,49 @@ dependencies = [
|
|
|
246
234
|
|
|
247
235
|
[[package]]
|
|
248
236
|
name = "parking_lot_core"
|
|
249
|
-
version = "0.9.
|
|
237
|
+
version = "0.9.10"
|
|
250
238
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
251
|
-
checksum = "
|
|
239
|
+
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
|
|
252
240
|
dependencies = [
|
|
253
241
|
"cfg-if",
|
|
254
242
|
"libc",
|
|
255
243
|
"redox_syscall",
|
|
256
244
|
"smallvec",
|
|
257
|
-
"windows-
|
|
245
|
+
"windows-targets",
|
|
258
246
|
]
|
|
259
247
|
|
|
260
248
|
[[package]]
|
|
261
249
|
name = "proc-macro2"
|
|
262
|
-
version = "1.0.
|
|
250
|
+
version = "1.0.92"
|
|
263
251
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
264
|
-
checksum = "
|
|
252
|
+
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
|
265
253
|
dependencies = [
|
|
266
254
|
"unicode-ident",
|
|
267
255
|
]
|
|
268
256
|
|
|
269
257
|
[[package]]
|
|
270
258
|
name = "quote"
|
|
271
|
-
version = "1.0.
|
|
259
|
+
version = "1.0.38"
|
|
272
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
273
|
-
checksum = "
|
|
261
|
+
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
|
274
262
|
dependencies = [
|
|
275
263
|
"proc-macro2",
|
|
276
264
|
]
|
|
277
265
|
|
|
278
266
|
[[package]]
|
|
279
267
|
name = "rb-sys"
|
|
280
|
-
version = "0.9.
|
|
268
|
+
version = "0.9.106"
|
|
281
269
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
282
|
-
checksum = "
|
|
270
|
+
checksum = "17b6efdbc8c1a22cb8b5d7ead0237c16c362c9ef6fbdc09e2d1040615b0f4cd0"
|
|
283
271
|
dependencies = [
|
|
284
272
|
"rb-sys-build",
|
|
285
273
|
]
|
|
286
274
|
|
|
287
275
|
[[package]]
|
|
288
276
|
name = "rb-sys-build"
|
|
289
|
-
version = "0.9.
|
|
277
|
+
version = "0.9.106"
|
|
290
278
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
291
|
-
checksum = "
|
|
279
|
+
checksum = "e1d88c51e52f8636a5efc24ec5987056e64e48a91ed2a1af96cb5564686cc10f"
|
|
292
280
|
dependencies = [
|
|
293
281
|
"bindgen",
|
|
294
282
|
"lazy_static",
|
|
@@ -307,35 +295,41 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
|
307
295
|
|
|
308
296
|
[[package]]
|
|
309
297
|
name = "redox_syscall"
|
|
310
|
-
version = "0.
|
|
298
|
+
version = "0.5.8"
|
|
311
299
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
312
|
-
checksum = "
|
|
300
|
+
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
|
|
313
301
|
dependencies = [
|
|
314
|
-
"bitflags
|
|
302
|
+
"bitflags",
|
|
315
303
|
]
|
|
316
304
|
|
|
317
305
|
[[package]]
|
|
318
306
|
name = "regex"
|
|
319
|
-
version = "1.
|
|
307
|
+
version = "1.11.1"
|
|
320
308
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
321
|
-
checksum = "
|
|
309
|
+
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
|
322
310
|
dependencies = [
|
|
323
311
|
"aho-corasick",
|
|
324
312
|
"memchr",
|
|
313
|
+
"regex-automata",
|
|
325
314
|
"regex-syntax",
|
|
326
315
|
]
|
|
327
316
|
|
|
328
317
|
[[package]]
|
|
329
318
|
name = "regex-automata"
|
|
330
|
-
version = "0.
|
|
319
|
+
version = "0.4.9"
|
|
331
320
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
332
|
-
checksum = "
|
|
321
|
+
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
|
322
|
+
dependencies = [
|
|
323
|
+
"aho-corasick",
|
|
324
|
+
"memchr",
|
|
325
|
+
"regex-syntax",
|
|
326
|
+
]
|
|
333
327
|
|
|
334
328
|
[[package]]
|
|
335
329
|
name = "regex-syntax"
|
|
336
|
-
version = "0.
|
|
330
|
+
version = "0.8.5"
|
|
337
331
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
338
|
-
checksum = "
|
|
332
|
+
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
|
339
333
|
|
|
340
334
|
[[package]]
|
|
341
335
|
name = "rustc-hash"
|
|
@@ -345,9 +339,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|
|
345
339
|
|
|
346
340
|
[[package]]
|
|
347
341
|
name = "scopeguard"
|
|
348
|
-
version = "1.
|
|
342
|
+
version = "1.2.0"
|
|
349
343
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
350
|
-
checksum = "
|
|
344
|
+
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
|
351
345
|
|
|
352
346
|
[[package]]
|
|
353
347
|
name = "seq-macro"
|
|
@@ -357,9 +351,23 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
|
357
351
|
|
|
358
352
|
[[package]]
|
|
359
353
|
name = "serde"
|
|
360
|
-
version = "1.0.
|
|
354
|
+
version = "1.0.217"
|
|
361
355
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
362
|
-
checksum = "
|
|
356
|
+
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
|
357
|
+
dependencies = [
|
|
358
|
+
"serde_derive",
|
|
359
|
+
]
|
|
360
|
+
|
|
361
|
+
[[package]]
|
|
362
|
+
name = "serde_derive"
|
|
363
|
+
version = "1.0.217"
|
|
364
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
365
|
+
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
|
366
|
+
dependencies = [
|
|
367
|
+
"proc-macro2",
|
|
368
|
+
"quote",
|
|
369
|
+
"syn",
|
|
370
|
+
]
|
|
363
371
|
|
|
364
372
|
[[package]]
|
|
365
373
|
name = "shell-words"
|
|
@@ -369,21 +377,21 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
|
|
369
377
|
|
|
370
378
|
[[package]]
|
|
371
379
|
name = "shlex"
|
|
372
|
-
version = "1.
|
|
380
|
+
version = "1.3.0"
|
|
373
381
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
374
|
-
checksum = "
|
|
382
|
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
375
383
|
|
|
376
384
|
[[package]]
|
|
377
385
|
name = "smallvec"
|
|
378
|
-
version = "1.
|
|
386
|
+
version = "1.13.2"
|
|
379
387
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
380
|
-
checksum = "
|
|
388
|
+
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
|
381
389
|
|
|
382
390
|
[[package]]
|
|
383
391
|
name = "syn"
|
|
384
|
-
version = "2.0.
|
|
392
|
+
version = "2.0.93"
|
|
385
393
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
386
|
-
checksum = "
|
|
394
|
+
checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
|
|
387
395
|
dependencies = [
|
|
388
396
|
"proc-macro2",
|
|
389
397
|
"quote",
|
|
@@ -392,8 +400,9 @@ dependencies = [
|
|
|
392
400
|
|
|
393
401
|
[[package]]
|
|
394
402
|
name = "tiktoken-rs"
|
|
395
|
-
version = "0.
|
|
396
|
-
source = "
|
|
403
|
+
version = "0.6.0"
|
|
404
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
405
|
+
checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6"
|
|
397
406
|
dependencies = [
|
|
398
407
|
"anyhow",
|
|
399
408
|
"base64",
|
|
@@ -401,6 +410,7 @@ dependencies = [
|
|
|
401
410
|
"fancy-regex",
|
|
402
411
|
"lazy_static",
|
|
403
412
|
"parking_lot",
|
|
413
|
+
"regex",
|
|
404
414
|
"rustc-hash",
|
|
405
415
|
]
|
|
406
416
|
|
|
@@ -415,50 +425,20 @@ dependencies = [
|
|
|
415
425
|
|
|
416
426
|
[[package]]
|
|
417
427
|
name = "unicode-ident"
|
|
418
|
-
version = "1.0.
|
|
419
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
420
|
-
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
|
421
|
-
|
|
422
|
-
[[package]]
|
|
423
|
-
name = "winapi"
|
|
424
|
-
version = "0.3.9"
|
|
428
|
+
version = "1.0.14"
|
|
425
429
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
426
|
-
checksum = "
|
|
427
|
-
dependencies = [
|
|
428
|
-
"winapi-i686-pc-windows-gnu",
|
|
429
|
-
"winapi-x86_64-pc-windows-gnu",
|
|
430
|
-
]
|
|
431
|
-
|
|
432
|
-
[[package]]
|
|
433
|
-
name = "winapi-i686-pc-windows-gnu"
|
|
434
|
-
version = "0.4.0"
|
|
435
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
436
|
-
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
|
437
|
-
|
|
438
|
-
[[package]]
|
|
439
|
-
name = "winapi-x86_64-pc-windows-gnu"
|
|
440
|
-
version = "0.4.0"
|
|
441
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
442
|
-
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
|
443
|
-
|
|
444
|
-
[[package]]
|
|
445
|
-
name = "windows-sys"
|
|
446
|
-
version = "0.45.0"
|
|
447
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
448
|
-
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
|
|
449
|
-
dependencies = [
|
|
450
|
-
"windows-targets",
|
|
451
|
-
]
|
|
430
|
+
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
|
452
431
|
|
|
453
432
|
[[package]]
|
|
454
433
|
name = "windows-targets"
|
|
455
|
-
version = "0.
|
|
434
|
+
version = "0.52.6"
|
|
456
435
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
457
|
-
checksum = "
|
|
436
|
+
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
|
458
437
|
dependencies = [
|
|
459
438
|
"windows_aarch64_gnullvm",
|
|
460
439
|
"windows_aarch64_msvc",
|
|
461
440
|
"windows_i686_gnu",
|
|
441
|
+
"windows_i686_gnullvm",
|
|
462
442
|
"windows_i686_msvc",
|
|
463
443
|
"windows_x86_64_gnu",
|
|
464
444
|
"windows_x86_64_gnullvm",
|
|
@@ -467,42 +447,48 @@ dependencies = [
|
|
|
467
447
|
|
|
468
448
|
[[package]]
|
|
469
449
|
name = "windows_aarch64_gnullvm"
|
|
470
|
-
version = "0.
|
|
450
|
+
version = "0.52.6"
|
|
471
451
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
472
|
-
checksum = "
|
|
452
|
+
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
|
473
453
|
|
|
474
454
|
[[package]]
|
|
475
455
|
name = "windows_aarch64_msvc"
|
|
476
|
-
version = "0.
|
|
456
|
+
version = "0.52.6"
|
|
477
457
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
478
|
-
checksum = "
|
|
458
|
+
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
|
479
459
|
|
|
480
460
|
[[package]]
|
|
481
461
|
name = "windows_i686_gnu"
|
|
482
|
-
version = "0.
|
|
462
|
+
version = "0.52.6"
|
|
463
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
464
|
+
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
|
465
|
+
|
|
466
|
+
[[package]]
|
|
467
|
+
name = "windows_i686_gnullvm"
|
|
468
|
+
version = "0.52.6"
|
|
483
469
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
484
|
-
checksum = "
|
|
470
|
+
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
|
485
471
|
|
|
486
472
|
[[package]]
|
|
487
473
|
name = "windows_i686_msvc"
|
|
488
|
-
version = "0.
|
|
474
|
+
version = "0.52.6"
|
|
489
475
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
490
|
-
checksum = "
|
|
476
|
+
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
|
491
477
|
|
|
492
478
|
[[package]]
|
|
493
479
|
name = "windows_x86_64_gnu"
|
|
494
|
-
version = "0.
|
|
480
|
+
version = "0.52.6"
|
|
495
481
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
496
|
-
checksum = "
|
|
482
|
+
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
|
497
483
|
|
|
498
484
|
[[package]]
|
|
499
485
|
name = "windows_x86_64_gnullvm"
|
|
500
|
-
version = "0.
|
|
486
|
+
version = "0.52.6"
|
|
501
487
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
502
|
-
checksum = "
|
|
488
|
+
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
|
503
489
|
|
|
504
490
|
[[package]]
|
|
505
491
|
name = "windows_x86_64_msvc"
|
|
506
|
-
version = "0.
|
|
492
|
+
version = "0.52.6"
|
|
507
493
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
508
|
-
checksum = "
|
|
494
|
+
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
data/Gemfile.lock
CHANGED
|
@@ -1,73 +1,73 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
tiktoken_ruby (0.0.
|
|
5
|
-
rb_sys (
|
|
4
|
+
tiktoken_ruby (0.0.11.1)
|
|
5
|
+
rb_sys (= 0.9.106)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
10
|
ast (2.4.2)
|
|
11
|
-
diff-lcs (1.5.
|
|
12
|
-
json (2.
|
|
11
|
+
diff-lcs (1.5.1)
|
|
12
|
+
json (2.9.1)
|
|
13
13
|
language_server-protocol (3.17.0.3)
|
|
14
14
|
lint_roller (1.1.0)
|
|
15
15
|
minitest (5.21.2)
|
|
16
|
-
parallel (1.
|
|
17
|
-
parser (3.3.0
|
|
16
|
+
parallel (1.26.3)
|
|
17
|
+
parser (3.3.6.0)
|
|
18
18
|
ast (~> 2.4.1)
|
|
19
19
|
racc
|
|
20
|
-
racc (1.
|
|
20
|
+
racc (1.8.1)
|
|
21
21
|
rainbow (3.1.1)
|
|
22
|
-
rake (13.1
|
|
23
|
-
rake-compiler (1.2.
|
|
22
|
+
rake (13.2.1)
|
|
23
|
+
rake-compiler (1.2.9)
|
|
24
24
|
rake
|
|
25
|
-
rb_sys (0.9.
|
|
26
|
-
regexp_parser (2.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
rspec-
|
|
30
|
-
rspec-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
rspec-expectations (3.12.3)
|
|
25
|
+
rb_sys (0.9.106)
|
|
26
|
+
regexp_parser (2.10.0)
|
|
27
|
+
rspec (3.13.0)
|
|
28
|
+
rspec-core (~> 3.13.0)
|
|
29
|
+
rspec-expectations (~> 3.13.0)
|
|
30
|
+
rspec-mocks (~> 3.13.0)
|
|
31
|
+
rspec-core (3.13.2)
|
|
32
|
+
rspec-support (~> 3.13.0)
|
|
33
|
+
rspec-expectations (3.13.3)
|
|
35
34
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
36
|
-
rspec-support (~> 3.
|
|
37
|
-
rspec-mocks (3.
|
|
35
|
+
rspec-support (~> 3.13.0)
|
|
36
|
+
rspec-mocks (3.13.2)
|
|
38
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
39
|
-
rspec-support (~> 3.
|
|
40
|
-
rspec-support (3.
|
|
41
|
-
rubocop (1.
|
|
38
|
+
rspec-support (~> 3.13.0)
|
|
39
|
+
rspec-support (3.13.2)
|
|
40
|
+
rubocop (1.69.2)
|
|
42
41
|
json (~> 2.3)
|
|
43
42
|
language_server-protocol (>= 3.17.0)
|
|
44
43
|
parallel (~> 1.10)
|
|
45
|
-
parser (>= 3.
|
|
44
|
+
parser (>= 3.3.0.2)
|
|
46
45
|
rainbow (>= 2.2.2, < 4.0)
|
|
47
|
-
regexp_parser (>=
|
|
48
|
-
|
|
49
|
-
rubocop-ast (>= 1.30.0, < 2.0)
|
|
46
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
47
|
+
rubocop-ast (>= 1.36.2, < 2.0)
|
|
50
48
|
ruby-progressbar (~> 1.7)
|
|
51
|
-
unicode-display_width (>= 2.4.0, <
|
|
52
|
-
rubocop-ast (1.
|
|
53
|
-
parser (>= 3.
|
|
54
|
-
rubocop-performance (1.
|
|
49
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
50
|
+
rubocop-ast (1.37.0)
|
|
51
|
+
parser (>= 3.3.1.0)
|
|
52
|
+
rubocop-performance (1.23.0)
|
|
55
53
|
rubocop (>= 1.48.1, < 2.0)
|
|
56
|
-
rubocop-ast (>= 1.
|
|
54
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
|
57
55
|
ruby-progressbar (1.13.0)
|
|
58
|
-
standard (1.
|
|
56
|
+
standard (1.43.0)
|
|
59
57
|
language_server-protocol (~> 3.17.0.2)
|
|
60
58
|
lint_roller (~> 1.0)
|
|
61
|
-
rubocop (~> 1.
|
|
59
|
+
rubocop (~> 1.69.1)
|
|
62
60
|
standard-custom (~> 1.0.0)
|
|
63
|
-
standard-performance (~> 1.
|
|
61
|
+
standard-performance (~> 1.6)
|
|
64
62
|
standard-custom (1.0.2)
|
|
65
63
|
lint_roller (~> 1.0)
|
|
66
64
|
rubocop (~> 1.50)
|
|
67
|
-
standard-performance (1.
|
|
65
|
+
standard-performance (1.6.0)
|
|
68
66
|
lint_roller (~> 1.1)
|
|
69
|
-
rubocop-performance (~> 1.
|
|
70
|
-
unicode-display_width (
|
|
67
|
+
rubocop-performance (~> 1.23.0)
|
|
68
|
+
unicode-display_width (3.1.3)
|
|
69
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
|
70
|
+
unicode-emoji (4.0.4)
|
|
71
71
|
yard (0.9.34)
|
|
72
72
|
yard-doctest (0.1.17)
|
|
73
73
|
minitest
|
|
@@ -89,4 +89,4 @@ DEPENDENCIES
|
|
|
89
89
|
yard-doctest
|
|
90
90
|
|
|
91
91
|
BUNDLED WITH
|
|
92
|
-
2.
|
|
92
|
+
2.5.18
|
data/Rakefile
CHANGED
|
@@ -21,3 +21,9 @@ end
|
|
|
21
21
|
task build: :compile
|
|
22
22
|
|
|
23
23
|
task default: %i[compile spec standard]
|
|
24
|
+
|
|
25
|
+
# Packaging default (non-precompiled) gem
|
|
26
|
+
require "rubygems/package_task"
|
|
27
|
+
gem_path = Gem::PackageTask.new(GEMSPEC).define
|
|
28
|
+
desc "Package the Ruby gem"
|
|
29
|
+
task "package" => [gem_path]
|
|
@@ -10,6 +10,6 @@ publish = false
|
|
|
10
10
|
crate-type = ["cdylib"]
|
|
11
11
|
|
|
12
12
|
[dependencies]
|
|
13
|
-
magnus = { version = "0.
|
|
14
|
-
rb-sys = { version = "
|
|
15
|
-
tiktoken-rs = {
|
|
13
|
+
magnus = { version = "0.7.1" }
|
|
14
|
+
rb-sys = { version = "0.9.106", features = ["stable-api-compiled-fallback"] }
|
|
15
|
+
tiktoken-rs = { version = "0.6.0" }
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
2
|
|
|
3
|
-
use
|
|
3
|
+
use tiktoken_rs::Rank;
|
|
4
4
|
|
|
5
|
+
use crate::uncicode_error;
|
|
5
6
|
|
|
6
7
|
#[magnus::wrap(class = "Tiktoken::Ext::CoreBPE")]
|
|
7
8
|
pub struct CoreBPEWrapper {
|
|
@@ -13,11 +14,15 @@ impl CoreBPEWrapper {
|
|
|
13
14
|
Self { core_bpe }
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
pub fn encode_ordinary(&self, text: String) -> Vec<
|
|
17
|
+
pub fn encode_ordinary(&self, text: String) -> Vec<Rank> {
|
|
17
18
|
self.core_bpe.encode_ordinary(text.as_str())
|
|
18
19
|
}
|
|
19
20
|
|
|
20
|
-
pub fn encode(
|
|
21
|
+
pub fn encode(
|
|
22
|
+
&self,
|
|
23
|
+
text: String,
|
|
24
|
+
allowed_special: magnus::RArray,
|
|
25
|
+
) -> Result<Vec<Rank>, magnus::Error> {
|
|
21
26
|
let allowed_special: Vec<String> = allowed_special.to_vec()?;
|
|
22
27
|
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
|
|
23
28
|
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
|
|
@@ -25,20 +30,18 @@ impl CoreBPEWrapper {
|
|
|
25
30
|
Ok(self.core_bpe.encode(text.as_str(), allowed_special))
|
|
26
31
|
}
|
|
27
32
|
|
|
28
|
-
pub fn encode_with_special_tokens(&self, text: String) -> Vec<
|
|
33
|
+
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
|
|
29
34
|
self.core_bpe.encode_with_special_tokens(text.as_str())
|
|
30
35
|
}
|
|
31
36
|
|
|
32
|
-
pub fn decode(&self, ids: Vec<
|
|
33
|
-
self.core_bpe.decode(ids)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
magnus::Error::new(error, e.to_string())
|
|
41
|
-
})
|
|
37
|
+
pub fn decode(&self, ids: Vec<Rank>) -> Result<String, magnus::Error> {
|
|
38
|
+
self.core_bpe.decode(ids).map_err(|e| {
|
|
39
|
+
let error = match uncicode_error() {
|
|
40
|
+
Ok(error) => error,
|
|
41
|
+
Err(e) => return e,
|
|
42
|
+
};
|
|
42
43
|
|
|
44
|
+
magnus::Error::new(error, e.to_string())
|
|
45
|
+
})
|
|
43
46
|
}
|
|
44
47
|
}
|
|
@@ -20,6 +20,11 @@ fn cl100k_base() -> CoreBPEWrapper {
|
|
|
20
20
|
CoreBPEWrapper::new(core_bpe)
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
fn o200k_base() -> CoreBPEWrapper {
|
|
24
|
+
let core_bpe = tiktoken_rs::o200k_base().unwrap();
|
|
25
|
+
CoreBPEWrapper::new(core_bpe)
|
|
26
|
+
}
|
|
27
|
+
|
|
23
28
|
fn module() -> Result<RModule, magnus::Error> {
|
|
24
29
|
define_module("Tiktoken")
|
|
25
30
|
}
|
|
@@ -37,6 +42,7 @@ fn init() -> Result<(), Error> {
|
|
|
37
42
|
factory_module.define_singleton_method("p50k_base", function!(p50k_base, 0))?;
|
|
38
43
|
factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
|
|
39
44
|
factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
|
|
45
|
+
factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
|
|
40
46
|
|
|
41
47
|
let ext_module = module.define_module("Ext")?;
|
|
42
48
|
let bpe_class = ext_module.define_class("CoreBPE", class::object())?;
|
data/lib/tiktoken_ruby.rb
CHANGED
|
@@ -64,13 +64,18 @@ module Tiktoken
|
|
|
64
64
|
:r50k_base,
|
|
65
65
|
:p50k_base,
|
|
66
66
|
:p50k_edit,
|
|
67
|
-
:cl100k_base
|
|
67
|
+
:cl100k_base,
|
|
68
|
+
:o200k_base
|
|
68
69
|
]
|
|
69
70
|
|
|
70
71
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
|
71
|
-
# that is also MIT licensed but by OpenAI
|
|
72
|
+
# that is also MIT licensed but by OpenAI;
|
|
73
|
+
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
|
74
|
+
# is the source of the mapping for the Rust library
|
|
72
75
|
MODEL_TO_ENCODING_NAME = {
|
|
73
76
|
# chat
|
|
77
|
+
"chatgpt-4o-latest": "o200k_base",
|
|
78
|
+
"gpt-4o": "o200k_base",
|
|
74
79
|
"gpt-4": "cl100k_base",
|
|
75
80
|
"gpt-3.5-turbo": "cl100k_base",
|
|
76
81
|
"gpt-35-turbo": "cl100k_base", # Azure deployment name
|
|
@@ -120,6 +125,7 @@ module Tiktoken
|
|
|
120
125
|
|
|
121
126
|
MODEL_PREFIX_TO_ENCODING = {
|
|
122
127
|
# chat
|
|
128
|
+
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
|
|
123
129
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
|
124
130
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
|
125
131
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
data/script/release
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
set -e
|
|
4
|
+
|
|
5
|
+
if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
|
|
6
|
+
echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
|
|
7
|
+
exit 1
|
|
8
|
+
fi
|
|
9
|
+
|
|
10
|
+
run_id=""
|
|
11
|
+
# Parse arguments
|
|
12
|
+
while [[ "$#" -gt 0 ]]; do
|
|
13
|
+
case $1 in
|
|
14
|
+
--run-id)
|
|
15
|
+
run_id="$2"
|
|
16
|
+
shift 2
|
|
17
|
+
;;
|
|
18
|
+
*)
|
|
19
|
+
echo "Unknown parameter passed: $1"
|
|
20
|
+
exit 1
|
|
21
|
+
;;
|
|
22
|
+
esac
|
|
23
|
+
done
|
|
24
|
+
|
|
25
|
+
if [ -z "${run_id}" ]; then
|
|
26
|
+
echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
|
|
27
|
+
exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
|
|
31
|
+
echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
|
|
32
|
+
|
|
33
|
+
rm -rf pkg/cross-compiled
|
|
34
|
+
gh run download "$run_id" -D pkg/cross-compiled
|
|
35
|
+
|
|
36
|
+
for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
|
|
37
|
+
echo "Publishing $gem"
|
|
38
|
+
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
|
|
39
|
+
done
|
|
40
|
+
|
|
41
|
+
# last but not least, the uncompiled gem
|
|
42
|
+
bundle exec rake package
|
|
43
|
+
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org
|
metadata
CHANGED
|
@@ -1,29 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiktoken_ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.11.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- IAPark
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - '='
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.9.
|
|
19
|
+
version: 0.9.106
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - '='
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.9.
|
|
26
|
+
version: 0.9.106
|
|
27
27
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
|
28
28
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
|
29
29
|
it to OpenAI APIs.
|
|
@@ -36,6 +36,7 @@ extra_rdoc_files: []
|
|
|
36
36
|
files:
|
|
37
37
|
- ".rspec"
|
|
38
38
|
- ".standard.yml"
|
|
39
|
+
- ".vscode/settings.json"
|
|
39
40
|
- Cargo.lock
|
|
40
41
|
- Cargo.toml
|
|
41
42
|
- Gemfile
|
|
@@ -51,8 +52,8 @@ files:
|
|
|
51
52
|
- lib/tiktoken_ruby.rb
|
|
52
53
|
- lib/tiktoken_ruby/encoding.rb
|
|
53
54
|
- lib/tiktoken_ruby/version.rb
|
|
55
|
+
- script/release
|
|
54
56
|
- sig/tiktoken_ruby.rbs
|
|
55
|
-
- tiktoken_ruby.gemspec
|
|
56
57
|
homepage: https://github.com/IAPark/tiktoken_ruby
|
|
57
58
|
licenses:
|
|
58
59
|
- MIT
|
|
@@ -68,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
68
69
|
requirements:
|
|
69
70
|
- - ">="
|
|
70
71
|
- !ruby/object:Gem::Version
|
|
71
|
-
version:
|
|
72
|
+
version: 3.1.0
|
|
72
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
74
|
requirements:
|
|
74
75
|
- - ">="
|
|
75
76
|
- !ruby/object:Gem::Version
|
|
76
|
-
version: 3.
|
|
77
|
+
version: 3.4.0
|
|
77
78
|
requirements: []
|
|
78
|
-
rubygems_version: 3.
|
|
79
|
+
rubygems_version: 3.5.22
|
|
79
80
|
signing_key:
|
|
80
81
|
specification_version: 4
|
|
81
82
|
summary: Ruby wrapper for Tiktoken
|
data/tiktoken_ruby.gemspec
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "lib/tiktoken_ruby/version"
|
|
4
|
-
|
|
5
|
-
Gem::Specification.new do |spec|
|
|
6
|
-
spec.name = "tiktoken_ruby"
|
|
7
|
-
spec.version = Tiktoken::VERSION
|
|
8
|
-
spec.authors = ["IAPark"]
|
|
9
|
-
spec.email = ["isaac.a.park@gmail.com"]
|
|
10
|
-
spec.summary = "Ruby wrapper for Tiktoken"
|
|
11
|
-
spec.description = "An unofficial Ruby wrapper for Tiktoken, " \
|
|
12
|
-
"a BPE tokenizer written by and used by OpenAI. It can be used to " \
|
|
13
|
-
"count the number of tokens in text before sending it to OpenAI APIs."
|
|
14
|
-
spec.homepage = "https://github.com/IAPark/tiktoken_ruby"
|
|
15
|
-
spec.license = "MIT"
|
|
16
|
-
spec.required_ruby_version = ">= 2.7.0"
|
|
17
|
-
spec.required_rubygems_version = ">= 3.1.0"
|
|
18
|
-
spec.platform = Gem::Platform::RUBY
|
|
19
|
-
|
|
20
|
-
spec.metadata["homepage_uri"] = spec.homepage
|
|
21
|
-
spec.metadata["source_code_uri"] = "https://github.com/IAPark/tiktoken_ruby"
|
|
22
|
-
spec.metadata["documentation_uri"] = "https://rubydoc.info/github/IAPark/tiktoken_ruby/main"
|
|
23
|
-
spec.files = Dir.chdir(__dir__) do
|
|
24
|
-
`git ls-files -z`.split("\x0").reject do |f|
|
|
25
|
-
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
spec.bindir = "exe"
|
|
29
|
-
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
30
|
-
spec.require_paths = ["lib"]
|
|
31
|
-
spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
|
|
32
|
-
spec.add_dependency "rb_sys", ">= 0.9.86"
|
|
33
|
-
end
|