tiktoken_ruby 0.0.12 → 0.0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +3 -0
- data/CHANGELOG.md +21 -0
- data/Cargo.lock +50 -106
- data/Gemfile.lock +18 -18
- data/README.md +33 -0
- data/ext/tiktoken_ruby/Cargo.toml +2 -2
- data/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +121 -7
- data/ext/tiktoken_ruby/src/lib.rs +10 -1
- data/lib/tiktoken_ruby/encoding.rb +7 -0
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +3 -1
- metadata +3 -4
- data/script/release +0 -43
- data/tiktoken_ruby.gemspec +0 -33
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 71cce652b2f6a2ca962823d1947603a5224f305901cb4d8c822ca32b58192d47
|
|
4
|
+
data.tar.gz: d2cd0525f5f784a5904e1b7bc05cb3dc0c00f1f10561d52a99c6ef12b1351e89
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fe7572bf7a82f77441335a273e90e6b4bce92be2b6fb6073c1409de635af8c3521b0181ba8d3422691f8d5242b6a3b08ec899fad465084bb7fc570d153e44b00
|
|
7
|
+
data.tar.gz: 4f95fa2c39ed53c1d40cb6928f294f1617b35042abd4b8cdcf641dac016b4dba9a15b3f46835cd85600af8a73152c5f3488694db9cdf9d3dd22707ab109a133d
|
data/.vscode/settings.json
CHANGED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# [v0.0.14.1] - 20-12-2025
|
|
2
|
+
## What's Changed
|
|
3
|
+
* Cut v0.0.12 by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/78
|
|
4
|
+
* Bump magnus from 0.8.0 to 0.8.1 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/83
|
|
5
|
+
* Bump actions/checkout from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/82
|
|
6
|
+
* Bump standard from 1.50.0 to 1.51.1 in the bundler-dependencies group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/81
|
|
7
|
+
* Bump actions/upload-artifact from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/84
|
|
8
|
+
* Bump magnus from 0.8.1 to 0.8.2 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/85
|
|
9
|
+
* Bump the bundler-dependencies group with 2 updates by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/86
|
|
10
|
+
* Support by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/87
|
|
11
|
+
* Bump actions/checkout from 5 to 6 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/89
|
|
12
|
+
* Bump standard from 1.51.1 to 1.52.0 in the bundler-dependencies group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/88
|
|
13
|
+
* release GVL while encoding / decoding tokens by @tenderworks in https://github.com/IAPark/tiktoken_ruby/pull/90
|
|
14
|
+
* Drop Ruby 3.1 support; automate release process by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/92
|
|
15
|
+
* Rewrite history by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/93
|
|
16
|
+
* Force workflow rebuild by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/94
|
|
17
|
+
|
|
18
|
+
## New Contributors
|
|
19
|
+
* @tenderworks made their first contribution in https://github.com/IAPark/tiktoken_ruby/pull/90
|
|
20
|
+
|
|
21
|
+
**Full Changelog**: https://github.com/IAPark/tiktoken_ruby/compare/v0.0.12...v0.0.14.1
|
data/Cargo.lock
CHANGED
|
@@ -4,18 +4,18 @@ version = 4
|
|
|
4
4
|
|
|
5
5
|
[[package]]
|
|
6
6
|
name = "aho-corasick"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.4"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
-
checksum = "
|
|
9
|
+
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"memchr",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
14
|
[[package]]
|
|
15
15
|
name = "anyhow"
|
|
16
|
-
version = "1.0.
|
|
16
|
+
version = "1.0.100"
|
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
-
checksum = "
|
|
18
|
+
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
|
|
19
19
|
|
|
20
20
|
[[package]]
|
|
21
21
|
name = "base64"
|
|
@@ -60,15 +60,15 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
|
60
60
|
|
|
61
61
|
[[package]]
|
|
62
62
|
name = "bitflags"
|
|
63
|
-
version = "2.
|
|
63
|
+
version = "2.10.0"
|
|
64
64
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
65
|
-
checksum = "
|
|
65
|
+
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
|
66
66
|
|
|
67
67
|
[[package]]
|
|
68
68
|
name = "bstr"
|
|
69
|
-
version = "1.12.
|
|
69
|
+
version = "1.12.1"
|
|
70
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
71
|
-
checksum = "
|
|
71
|
+
checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
|
|
72
72
|
dependencies = [
|
|
73
73
|
"memchr",
|
|
74
74
|
"regex-automata",
|
|
@@ -86,9 +86,9 @@ dependencies = [
|
|
|
86
86
|
|
|
87
87
|
[[package]]
|
|
88
88
|
name = "cfg-if"
|
|
89
|
-
version = "1.0.
|
|
89
|
+
version = "1.0.4"
|
|
90
90
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
91
|
-
checksum = "
|
|
91
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
92
92
|
|
|
93
93
|
[[package]]
|
|
94
94
|
name = "clang-sys"
|
|
@@ -147,25 +147,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
|
147
147
|
|
|
148
148
|
[[package]]
|
|
149
149
|
name = "libc"
|
|
150
|
-
version = "0.2.
|
|
150
|
+
version = "0.2.177"
|
|
151
151
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
152
|
-
checksum = "
|
|
152
|
+
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
|
|
153
153
|
|
|
154
154
|
[[package]]
|
|
155
155
|
name = "libloading"
|
|
156
|
-
version = "0.8.
|
|
156
|
+
version = "0.8.9"
|
|
157
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
158
|
-
checksum = "
|
|
158
|
+
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
|
159
159
|
dependencies = [
|
|
160
160
|
"cfg-if",
|
|
161
|
-
"windows-
|
|
161
|
+
"windows-link",
|
|
162
162
|
]
|
|
163
163
|
|
|
164
164
|
[[package]]
|
|
165
165
|
name = "magnus"
|
|
166
|
-
version = "0.8.
|
|
166
|
+
version = "0.8.2"
|
|
167
167
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
168
|
-
checksum = "
|
|
168
|
+
checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
|
|
169
169
|
dependencies = [
|
|
170
170
|
"magnus-macros",
|
|
171
171
|
"rb-sys",
|
|
@@ -186,9 +186,9 @@ dependencies = [
|
|
|
186
186
|
|
|
187
187
|
[[package]]
|
|
188
188
|
name = "memchr"
|
|
189
|
-
version = "2.7.
|
|
189
|
+
version = "2.7.6"
|
|
190
190
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
191
|
-
checksum = "
|
|
191
|
+
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
|
192
192
|
|
|
193
193
|
[[package]]
|
|
194
194
|
name = "minimal-lexical"
|
|
@@ -208,18 +208,18 @@ dependencies = [
|
|
|
208
208
|
|
|
209
209
|
[[package]]
|
|
210
210
|
name = "proc-macro2"
|
|
211
|
-
version = "1.0.
|
|
211
|
+
version = "1.0.103"
|
|
212
212
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
213
|
-
checksum = "
|
|
213
|
+
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
|
214
214
|
dependencies = [
|
|
215
215
|
"unicode-ident",
|
|
216
216
|
]
|
|
217
217
|
|
|
218
218
|
[[package]]
|
|
219
219
|
name = "quote"
|
|
220
|
-
version = "1.0.
|
|
220
|
+
version = "1.0.42"
|
|
221
221
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
222
|
-
checksum = "
|
|
222
|
+
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
|
223
223
|
dependencies = [
|
|
224
224
|
"proc-macro2",
|
|
225
225
|
]
|
|
@@ -256,9 +256,9 @@ checksum = "08f8d2924cf136a1315e2b4c7460a39f62ef11ee5d522df9b2750fab55b868b6"
|
|
|
256
256
|
|
|
257
257
|
[[package]]
|
|
258
258
|
name = "regex"
|
|
259
|
-
version = "1.
|
|
259
|
+
version = "1.12.2"
|
|
260
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
261
|
-
checksum = "
|
|
261
|
+
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
|
|
262
262
|
dependencies = [
|
|
263
263
|
"aho-corasick",
|
|
264
264
|
"memchr",
|
|
@@ -268,9 +268,9 @@ dependencies = [
|
|
|
268
268
|
|
|
269
269
|
[[package]]
|
|
270
270
|
name = "regex-automata"
|
|
271
|
-
version = "0.4.
|
|
271
|
+
version = "0.4.13"
|
|
272
272
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
273
|
-
checksum = "
|
|
273
|
+
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
|
274
274
|
dependencies = [
|
|
275
275
|
"aho-corasick",
|
|
276
276
|
"memchr",
|
|
@@ -279,9 +279,9 @@ dependencies = [
|
|
|
279
279
|
|
|
280
280
|
[[package]]
|
|
281
281
|
name = "regex-syntax"
|
|
282
|
-
version = "0.8.
|
|
282
|
+
version = "0.8.8"
|
|
283
283
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
284
|
-
checksum = "
|
|
284
|
+
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
|
285
285
|
|
|
286
286
|
[[package]]
|
|
287
287
|
name = "rustc-hash"
|
|
@@ -297,18 +297,27 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
|
|
297
297
|
|
|
298
298
|
[[package]]
|
|
299
299
|
name = "serde"
|
|
300
|
-
version = "1.0.
|
|
300
|
+
version = "1.0.228"
|
|
301
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
302
|
+
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
|
303
|
+
dependencies = [
|
|
304
|
+
"serde_core",
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
[[package]]
|
|
308
|
+
name = "serde_core"
|
|
309
|
+
version = "1.0.228"
|
|
301
310
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
302
|
-
checksum = "
|
|
311
|
+
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
|
303
312
|
dependencies = [
|
|
304
313
|
"serde_derive",
|
|
305
314
|
]
|
|
306
315
|
|
|
307
316
|
[[package]]
|
|
308
317
|
name = "serde_derive"
|
|
309
|
-
version = "1.0.
|
|
318
|
+
version = "1.0.228"
|
|
310
319
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
311
|
-
checksum = "
|
|
320
|
+
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
|
312
321
|
dependencies = [
|
|
313
322
|
"proc-macro2",
|
|
314
323
|
"quote",
|
|
@@ -329,9 +338,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
|
329
338
|
|
|
330
339
|
[[package]]
|
|
331
340
|
name = "syn"
|
|
332
|
-
version = "2.0.
|
|
341
|
+
version = "2.0.110"
|
|
333
342
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
334
|
-
checksum = "
|
|
343
|
+
checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
|
|
335
344
|
dependencies = [
|
|
336
345
|
"proc-macro2",
|
|
337
346
|
"quote",
|
|
@@ -340,9 +349,9 @@ dependencies = [
|
|
|
340
349
|
|
|
341
350
|
[[package]]
|
|
342
351
|
name = "tiktoken-rs"
|
|
343
|
-
version = "0.
|
|
352
|
+
version = "0.9.1"
|
|
344
353
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
345
|
-
checksum = "
|
|
354
|
+
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
|
|
346
355
|
dependencies = [
|
|
347
356
|
"anyhow",
|
|
348
357
|
"base64",
|
|
@@ -364,77 +373,12 @@ dependencies = [
|
|
|
364
373
|
|
|
365
374
|
[[package]]
|
|
366
375
|
name = "unicode-ident"
|
|
367
|
-
version = "1.0.
|
|
376
|
+
version = "1.0.22"
|
|
368
377
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
369
|
-
checksum = "
|
|
378
|
+
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
|
370
379
|
|
|
371
380
|
[[package]]
|
|
372
381
|
name = "windows-link"
|
|
373
|
-
version = "0.1
|
|
374
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
375
|
-
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
|
376
|
-
|
|
377
|
-
[[package]]
|
|
378
|
-
name = "windows-targets"
|
|
379
|
-
version = "0.53.3"
|
|
380
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
381
|
-
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
|
|
382
|
-
dependencies = [
|
|
383
|
-
"windows-link",
|
|
384
|
-
"windows_aarch64_gnullvm",
|
|
385
|
-
"windows_aarch64_msvc",
|
|
386
|
-
"windows_i686_gnu",
|
|
387
|
-
"windows_i686_gnullvm",
|
|
388
|
-
"windows_i686_msvc",
|
|
389
|
-
"windows_x86_64_gnu",
|
|
390
|
-
"windows_x86_64_gnullvm",
|
|
391
|
-
"windows_x86_64_msvc",
|
|
392
|
-
]
|
|
393
|
-
|
|
394
|
-
[[package]]
|
|
395
|
-
name = "windows_aarch64_gnullvm"
|
|
396
|
-
version = "0.53.0"
|
|
397
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
398
|
-
checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
|
|
399
|
-
|
|
400
|
-
[[package]]
|
|
401
|
-
name = "windows_aarch64_msvc"
|
|
402
|
-
version = "0.53.0"
|
|
403
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
404
|
-
checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
|
|
405
|
-
|
|
406
|
-
[[package]]
|
|
407
|
-
name = "windows_i686_gnu"
|
|
408
|
-
version = "0.53.0"
|
|
409
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
410
|
-
checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
|
|
411
|
-
|
|
412
|
-
[[package]]
|
|
413
|
-
name = "windows_i686_gnullvm"
|
|
414
|
-
version = "0.53.0"
|
|
415
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
416
|
-
checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
|
|
417
|
-
|
|
418
|
-
[[package]]
|
|
419
|
-
name = "windows_i686_msvc"
|
|
420
|
-
version = "0.53.0"
|
|
421
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
422
|
-
checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
|
|
423
|
-
|
|
424
|
-
[[package]]
|
|
425
|
-
name = "windows_x86_64_gnu"
|
|
426
|
-
version = "0.53.0"
|
|
427
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
428
|
-
checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
|
|
429
|
-
|
|
430
|
-
[[package]]
|
|
431
|
-
name = "windows_x86_64_gnullvm"
|
|
432
|
-
version = "0.53.0"
|
|
433
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
434
|
-
checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
|
|
435
|
-
|
|
436
|
-
[[package]]
|
|
437
|
-
name = "windows_x86_64_msvc"
|
|
438
|
-
version = "0.53.0"
|
|
382
|
+
version = "0.2.1"
|
|
439
383
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
440
|
-
checksum = "
|
|
384
|
+
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
tiktoken_ruby (0.0.
|
|
4
|
+
tiktoken_ruby (0.0.14.1)
|
|
5
5
|
rb_sys (~> 0.9)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -9,38 +9,38 @@ GEM
|
|
|
9
9
|
specs:
|
|
10
10
|
ast (2.4.3)
|
|
11
11
|
diff-lcs (1.6.2)
|
|
12
|
-
json (2.
|
|
12
|
+
json (2.16.0)
|
|
13
13
|
language_server-protocol (3.17.0.5)
|
|
14
14
|
lint_roller (1.1.0)
|
|
15
15
|
minitest (5.25.5)
|
|
16
16
|
parallel (1.27.0)
|
|
17
|
-
parser (3.3.
|
|
17
|
+
parser (3.3.10.0)
|
|
18
18
|
ast (~> 2.4.1)
|
|
19
19
|
racc
|
|
20
|
-
prism (1.
|
|
20
|
+
prism (1.6.0)
|
|
21
21
|
racc (1.8.1)
|
|
22
22
|
rainbow (3.1.1)
|
|
23
|
-
rake (13.3.
|
|
23
|
+
rake (13.3.1)
|
|
24
24
|
rake-compiler (1.3.0)
|
|
25
25
|
rake
|
|
26
26
|
rake-compiler-dock (1.9.1)
|
|
27
27
|
rb_sys (0.9.117)
|
|
28
28
|
rake-compiler-dock (= 1.9.1)
|
|
29
|
-
regexp_parser (2.
|
|
30
|
-
rspec (3.13.
|
|
29
|
+
regexp_parser (2.11.3)
|
|
30
|
+
rspec (3.13.2)
|
|
31
31
|
rspec-core (~> 3.13.0)
|
|
32
32
|
rspec-expectations (~> 3.13.0)
|
|
33
33
|
rspec-mocks (~> 3.13.0)
|
|
34
|
-
rspec-core (3.13.
|
|
34
|
+
rspec-core (3.13.6)
|
|
35
35
|
rspec-support (~> 3.13.0)
|
|
36
36
|
rspec-expectations (3.13.5)
|
|
37
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
38
38
|
rspec-support (~> 3.13.0)
|
|
39
|
-
rspec-mocks (3.13.
|
|
39
|
+
rspec-mocks (3.13.7)
|
|
40
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
41
41
|
rspec-support (~> 3.13.0)
|
|
42
|
-
rspec-support (3.13.
|
|
43
|
-
rubocop (1.
|
|
42
|
+
rspec-support (3.13.6)
|
|
43
|
+
rubocop (1.81.7)
|
|
44
44
|
json (~> 2.3)
|
|
45
45
|
language_server-protocol (~> 3.17.0.2)
|
|
46
46
|
lint_roller (~> 1.1.0)
|
|
@@ -48,10 +48,10 @@ GEM
|
|
|
48
48
|
parser (>= 3.3.0.2)
|
|
49
49
|
rainbow (>= 2.2.2, < 4.0)
|
|
50
50
|
regexp_parser (>= 2.9.3, < 3.0)
|
|
51
|
-
rubocop-ast (>= 1.
|
|
51
|
+
rubocop-ast (>= 1.47.1, < 2.0)
|
|
52
52
|
ruby-progressbar (~> 1.7)
|
|
53
53
|
unicode-display_width (>= 2.4.0, < 4.0)
|
|
54
|
-
rubocop-ast (1.
|
|
54
|
+
rubocop-ast (1.48.0)
|
|
55
55
|
parser (>= 3.3.7.2)
|
|
56
56
|
prism (~> 1.4)
|
|
57
57
|
rubocop-performance (1.25.0)
|
|
@@ -59,10 +59,10 @@ GEM
|
|
|
59
59
|
rubocop (>= 1.75.0, < 2.0)
|
|
60
60
|
rubocop-ast (>= 1.38.0, < 2.0)
|
|
61
61
|
ruby-progressbar (1.13.0)
|
|
62
|
-
standard (1.
|
|
62
|
+
standard (1.52.0)
|
|
63
63
|
language_server-protocol (~> 3.17.0.2)
|
|
64
64
|
lint_roller (~> 1.0)
|
|
65
|
-
rubocop (~> 1.
|
|
65
|
+
rubocop (~> 1.81.7)
|
|
66
66
|
standard-custom (~> 1.0.0)
|
|
67
67
|
standard-performance (~> 1.8)
|
|
68
68
|
standard-custom (1.0.2)
|
|
@@ -71,9 +71,9 @@ GEM
|
|
|
71
71
|
standard-performance (1.8.0)
|
|
72
72
|
lint_roller (~> 1.1)
|
|
73
73
|
rubocop-performance (~> 1.25.0)
|
|
74
|
-
unicode-display_width (3.
|
|
75
|
-
unicode-emoji (~> 4.
|
|
76
|
-
unicode-emoji (4.0
|
|
74
|
+
unicode-display_width (3.2.0)
|
|
75
|
+
unicode-emoji (~> 4.1)
|
|
76
|
+
unicode-emoji (4.1.0)
|
|
77
77
|
yard (0.9.37)
|
|
78
78
|
yard-doctest (0.1.17)
|
|
79
79
|
minitest
|
data/README.md
CHANGED
|
@@ -36,6 +36,39 @@ enc = Tiktoken.encoding_for_model("gpt-4")
|
|
|
36
36
|
enc.encode("hello world").length #=> 2
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
+
### Encoding methods
|
|
40
|
+
|
|
41
|
+
There are three methods for encoding text:
|
|
42
|
+
|
|
43
|
+
- `encode_ordinary(text)` - Encodes text, always treating special tokens as ordinary text
|
|
44
|
+
- `encode(text, allowed_special: [])` - Encodes text, treating special tokens as text unless listed in `allowed_special`
|
|
45
|
+
- `encode_with_special_tokens(text)` - Encodes text, recognizing and parsing all special tokens
|
|
46
|
+
|
|
47
|
+
**Special tokens** are control sequences used by OpenAI models, such as `<|endoftext|>`, `<|fim_prefix|>`, `<|fim_middle|>`, and `<|fim_suffix|>`. The encoding methods differ in how they handle these sequences:
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
enc = Tiktoken.get_encoding("cl100k_base")
|
|
51
|
+
text = "Hello<|endoftext|>World"
|
|
52
|
+
|
|
53
|
+
# encode_ordinary: treats <|endoftext|> as literal characters (9 tokens)
|
|
54
|
+
enc.encode_ordinary(text)
|
|
55
|
+
#=> [9906, 27, 91, 8862, 728, 428, 91, 29, 10343]
|
|
56
|
+
|
|
57
|
+
# encode: same as encode_ordinary by default
|
|
58
|
+
enc.encode(text)
|
|
59
|
+
#=> [9906, 27, 91, 8862, 728, 428, 91, 29, 10343]
|
|
60
|
+
|
|
61
|
+
# encode with allowed_special: recognizes the specified special token (3 tokens)
|
|
62
|
+
enc.encode(text, allowed_special: ["<|endoftext|>"])
|
|
63
|
+
#=> [9906, 100257, 10343]
|
|
64
|
+
|
|
65
|
+
# encode_with_special_tokens: recognizes ALL special tokens (3 tokens)
|
|
66
|
+
enc.encode_with_special_tokens(text)
|
|
67
|
+
#=> [9906, 100257, 10343]
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
All methods round-trip correctly through `decode`.
|
|
71
|
+
|
|
39
72
|
## Development
|
|
40
73
|
|
|
41
74
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
@@ -10,6 +10,6 @@ publish = false
|
|
|
10
10
|
crate-type = ["cdylib"]
|
|
11
11
|
|
|
12
12
|
[dependencies]
|
|
13
|
-
magnus = { version = "0.8.
|
|
13
|
+
magnus = { version = "0.8.2" }
|
|
14
14
|
rb-sys = { version = "0.9.117", features = ["stable-api-compiled-fallback"] }
|
|
15
|
-
tiktoken-rs = { version = "0.
|
|
15
|
+
tiktoken-rs = { version = "0.9.0" }
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
|
+
use std::ffi::c_void;
|
|
2
3
|
|
|
3
4
|
use tiktoken_rs::Rank;
|
|
4
5
|
|
|
@@ -9,13 +10,82 @@ pub struct CoreBPEWrapper {
|
|
|
9
10
|
core_bpe: tiktoken_rs::CoreBPE,
|
|
10
11
|
}
|
|
11
12
|
|
|
13
|
+
struct EncodeOrdinaryData {
|
|
14
|
+
core_bpe: *const tiktoken_rs::CoreBPE,
|
|
15
|
+
text: String,
|
|
16
|
+
result: Vec<Rank>,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
struct EncodeData {
|
|
20
|
+
core_bpe: *const tiktoken_rs::CoreBPE,
|
|
21
|
+
text: String,
|
|
22
|
+
allowed_special: HashSet<String>,
|
|
23
|
+
result: Vec<Rank>,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
struct EncodeSpecialData {
|
|
27
|
+
core_bpe: *const tiktoken_rs::CoreBPE,
|
|
28
|
+
text: String,
|
|
29
|
+
result: Vec<Rank>,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
struct DecodeData {
|
|
33
|
+
core_bpe: *const tiktoken_rs::CoreBPE,
|
|
34
|
+
ids: Vec<Rank>,
|
|
35
|
+
result: Result<String, String>,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
unsafe extern "C" fn encode_ordinary_without_gvl(data: *mut c_void) -> *mut c_void {
|
|
39
|
+
let data = &mut *(data as *mut EncodeOrdinaryData);
|
|
40
|
+
let core_bpe = &*data.core_bpe;
|
|
41
|
+
data.result = core_bpe.encode_ordinary(&data.text);
|
|
42
|
+
std::ptr::null_mut()
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
unsafe extern "C" fn encode_without_gvl(data: *mut c_void) -> *mut c_void {
|
|
46
|
+
let data = &mut *(data as *mut EncodeData);
|
|
47
|
+
let core_bpe = &*data.core_bpe;
|
|
48
|
+
let allowed_special: HashSet<&str> = data.allowed_special.iter().map(|s| s.as_str()).collect();
|
|
49
|
+
data.result = core_bpe.encode(&data.text, &allowed_special).0;
|
|
50
|
+
std::ptr::null_mut()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
unsafe extern "C" fn encode_special_without_gvl(data: *mut c_void) -> *mut c_void {
|
|
54
|
+
let data = &mut *(data as *mut EncodeSpecialData);
|
|
55
|
+
let core_bpe = &*data.core_bpe;
|
|
56
|
+
data.result = core_bpe.encode_with_special_tokens(&data.text);
|
|
57
|
+
std::ptr::null_mut()
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
unsafe extern "C" fn decode_without_gvl(data: *mut c_void) -> *mut c_void {
|
|
61
|
+
let data = &mut *(data as *mut DecodeData);
|
|
62
|
+
let core_bpe = &*data.core_bpe;
|
|
63
|
+
data.result = core_bpe.decode(std::mem::take(&mut data.ids)).map_err(|e| e.to_string());
|
|
64
|
+
std::ptr::null_mut()
|
|
65
|
+
}
|
|
66
|
+
|
|
12
67
|
impl CoreBPEWrapper {
|
|
13
68
|
pub fn new(core_bpe: tiktoken_rs::CoreBPE) -> Self {
|
|
14
69
|
Self { core_bpe }
|
|
15
70
|
}
|
|
16
71
|
|
|
17
72
|
pub fn encode_ordinary(&self, text: String) -> Vec<Rank> {
|
|
18
|
-
|
|
73
|
+
let mut data = EncodeOrdinaryData {
|
|
74
|
+
core_bpe: &self.core_bpe as *const _,
|
|
75
|
+
text,
|
|
76
|
+
result: Vec::new(),
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
unsafe {
|
|
80
|
+
rb_sys::rb_thread_call_without_gvl(
|
|
81
|
+
Some(encode_ordinary_without_gvl),
|
|
82
|
+
&mut data as *mut _ as *mut c_void,
|
|
83
|
+
None,
|
|
84
|
+
std::ptr::null_mut(),
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
data.result
|
|
19
89
|
}
|
|
20
90
|
|
|
21
91
|
pub fn encode(
|
|
@@ -24,24 +94,68 @@ impl CoreBPEWrapper {
|
|
|
24
94
|
allowed_special: magnus::RArray,
|
|
25
95
|
) -> Result<Vec<Rank>, magnus::Error> {
|
|
26
96
|
let allowed_special: Vec<String> = allowed_special.to_vec()?;
|
|
27
|
-
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
|
|
28
|
-
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
|
|
29
97
|
|
|
30
|
-
|
|
98
|
+
let mut data = EncodeData {
|
|
99
|
+
core_bpe: &self.core_bpe as *const _,
|
|
100
|
+
text,
|
|
101
|
+
allowed_special: HashSet::from_iter(allowed_special),
|
|
102
|
+
result: Vec::new(),
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
unsafe {
|
|
106
|
+
rb_sys::rb_thread_call_without_gvl(
|
|
107
|
+
Some(encode_without_gvl),
|
|
108
|
+
&mut data as *mut _ as *mut c_void,
|
|
109
|
+
None,
|
|
110
|
+
std::ptr::null_mut(),
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
Ok(data.result)
|
|
31
115
|
}
|
|
32
116
|
|
|
33
117
|
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
|
|
34
|
-
|
|
118
|
+
let mut data = EncodeSpecialData {
|
|
119
|
+
core_bpe: &self.core_bpe as *const _,
|
|
120
|
+
text,
|
|
121
|
+
result: Vec::new(),
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
unsafe {
|
|
125
|
+
rb_sys::rb_thread_call_without_gvl(
|
|
126
|
+
Some(encode_special_without_gvl),
|
|
127
|
+
&mut data as *mut _ as *mut c_void,
|
|
128
|
+
None,
|
|
129
|
+
std::ptr::null_mut(),
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
data.result
|
|
35
134
|
}
|
|
36
135
|
|
|
37
136
|
pub fn decode(&self, ids: Vec<Rank>) -> Result<String, magnus::Error> {
|
|
38
|
-
|
|
137
|
+
let mut data = DecodeData {
|
|
138
|
+
core_bpe: &self.core_bpe as *const _,
|
|
139
|
+
ids,
|
|
140
|
+
result: Err(String::new()),
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
unsafe {
|
|
144
|
+
rb_sys::rb_thread_call_without_gvl(
|
|
145
|
+
Some(decode_without_gvl),
|
|
146
|
+
&mut data as *mut _ as *mut c_void,
|
|
147
|
+
None,
|
|
148
|
+
std::ptr::null_mut(),
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
data.result.map_err(|e| {
|
|
39
153
|
let error = match uncicode_error() {
|
|
40
154
|
Ok(error) => error,
|
|
41
155
|
Err(e) => return e,
|
|
42
156
|
};
|
|
43
157
|
|
|
44
|
-
magnus::Error::new(error, e
|
|
158
|
+
magnus::Error::new(error, e)
|
|
45
159
|
})
|
|
46
160
|
}
|
|
47
161
|
}
|
|
@@ -25,12 +25,20 @@ fn o200k_base() -> CoreBPEWrapper {
|
|
|
25
25
|
CoreBPEWrapper::new(core_bpe)
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
fn o200k_harmony() -> CoreBPEWrapper {
|
|
29
|
+
let core_bpe = tiktoken_rs::o200k_harmony().unwrap();
|
|
30
|
+
CoreBPEWrapper::new(core_bpe)
|
|
31
|
+
}
|
|
32
|
+
|
|
28
33
|
fn module() -> Result<RModule, magnus::Error> {
|
|
29
34
|
Ruby::get().unwrap().define_module("Tiktoken")
|
|
30
35
|
}
|
|
31
36
|
|
|
32
37
|
fn uncicode_error() -> Result<ExceptionClass, magnus::Error> {
|
|
33
|
-
module()?.define_error(
|
|
38
|
+
module()?.define_error(
|
|
39
|
+
"UnicodeError",
|
|
40
|
+
Ruby::get().unwrap().exception_standard_error(),
|
|
41
|
+
)
|
|
34
42
|
}
|
|
35
43
|
|
|
36
44
|
#[magnus::init]
|
|
@@ -43,6 +51,7 @@ fn init() -> Result<(), Error> {
|
|
|
43
51
|
factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
|
|
44
52
|
factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
|
|
45
53
|
factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
|
|
54
|
+
factory_module.define_singleton_method("o200k_harmony", function!(o200k_harmony, 0))?;
|
|
46
55
|
|
|
47
56
|
let ext_module = module.define_module("Ext")?;
|
|
48
57
|
let bpe_class = ext_module.define_class("CoreBPE", Ruby::get().unwrap().class_object())?;
|
|
@@ -40,6 +40,13 @@ class Tiktoken::Encoding
|
|
|
40
40
|
@ext_base_bpe.encode(text, allowed_special)
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
+
# Encodes the text as a list of integer tokens, including special tokens.
|
|
44
|
+
# @param text [String] The text to encode
|
|
45
|
+
# @return [Array<Integer>] The encoded tokens
|
|
46
|
+
def encode_with_special_tokens(text)
|
|
47
|
+
@ext_base_bpe.encode_with_special_tokens(text)
|
|
48
|
+
end
|
|
49
|
+
|
|
43
50
|
# Decodes the tokens back into text
|
|
44
51
|
# @param tokens [Array<Integer>] The tokens to decode
|
|
45
52
|
# @return [String] The decoded text
|
data/lib/tiktoken_ruby.rb
CHANGED
|
@@ -65,7 +65,8 @@ module Tiktoken
|
|
|
65
65
|
:p50k_base,
|
|
66
66
|
:p50k_edit,
|
|
67
67
|
:cl100k_base,
|
|
68
|
-
:o200k_base
|
|
68
|
+
:o200k_base,
|
|
69
|
+
:o200k_harmony
|
|
69
70
|
]
|
|
70
71
|
|
|
71
72
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
|
@@ -143,6 +144,7 @@ module Tiktoken
|
|
|
143
144
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
|
144
145
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
|
145
146
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
|
147
|
+
"gpt-oss-": "o200k_harmony",
|
|
146
148
|
# fine-tuned
|
|
147
149
|
"ft:gpt-4o": "cl100k_base",
|
|
148
150
|
"ft:gpt-4": "cl100k_base",
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiktoken_ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.14.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- IAPark
|
|
@@ -36,6 +36,7 @@ files:
|
|
|
36
36
|
- ".rspec"
|
|
37
37
|
- ".standard.yml"
|
|
38
38
|
- ".vscode/settings.json"
|
|
39
|
+
- CHANGELOG.md
|
|
39
40
|
- Cargo.lock
|
|
40
41
|
- Cargo.toml
|
|
41
42
|
- Gemfile
|
|
@@ -51,9 +52,7 @@ files:
|
|
|
51
52
|
- lib/tiktoken_ruby.rb
|
|
52
53
|
- lib/tiktoken_ruby/encoding.rb
|
|
53
54
|
- lib/tiktoken_ruby/version.rb
|
|
54
|
-
- script/release
|
|
55
55
|
- sig/tiktoken_ruby.rbs
|
|
56
|
-
- tiktoken_ruby.gemspec
|
|
57
56
|
homepage: https://github.com/IAPark/tiktoken_ruby
|
|
58
57
|
licenses:
|
|
59
58
|
- MIT
|
|
@@ -68,7 +67,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
68
67
|
requirements:
|
|
69
68
|
- - ">="
|
|
70
69
|
- !ruby/object:Gem::Version
|
|
71
|
-
version: 3.
|
|
70
|
+
version: 3.2.0
|
|
72
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
72
|
requirements:
|
|
74
73
|
- - ">="
|
data/script/release
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
|
|
3
|
-
set -e
|
|
4
|
-
|
|
5
|
-
if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
|
|
6
|
-
echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
|
|
7
|
-
exit 1
|
|
8
|
-
fi
|
|
9
|
-
|
|
10
|
-
run_id=""
|
|
11
|
-
# Parse arguments
|
|
12
|
-
while [[ "$#" -gt 0 ]]; do
|
|
13
|
-
case $1 in
|
|
14
|
-
--run-id)
|
|
15
|
-
run_id="$2"
|
|
16
|
-
shift 2
|
|
17
|
-
;;
|
|
18
|
-
*)
|
|
19
|
-
echo "Unknown parameter passed: $1"
|
|
20
|
-
exit 1
|
|
21
|
-
;;
|
|
22
|
-
esac
|
|
23
|
-
done
|
|
24
|
-
|
|
25
|
-
if [ -z "${run_id}" ]; then
|
|
26
|
-
echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
|
|
27
|
-
exit 1
|
|
28
|
-
fi
|
|
29
|
-
|
|
30
|
-
version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
|
|
31
|
-
echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
|
|
32
|
-
|
|
33
|
-
rm -rf pkg/cross-compiled
|
|
34
|
-
gh run download "$run_id" -D pkg/cross-compiled
|
|
35
|
-
|
|
36
|
-
for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
|
|
37
|
-
echo "Publishing $gem"
|
|
38
|
-
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
|
|
39
|
-
done
|
|
40
|
-
|
|
41
|
-
# last but not least, the uncompiled gem
|
|
42
|
-
bundle exec rake package
|
|
43
|
-
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org
|
data/tiktoken_ruby.gemspec
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "lib/tiktoken_ruby/version"
|
|
4
|
-
|
|
5
|
-
Gem::Specification.new do |spec|
|
|
6
|
-
spec.name = "tiktoken_ruby"
|
|
7
|
-
spec.version = Tiktoken::VERSION
|
|
8
|
-
spec.authors = ["IAPark"]
|
|
9
|
-
spec.email = ["isaac.a.park@gmail.com"]
|
|
10
|
-
spec.summary = "Ruby wrapper for Tiktoken"
|
|
11
|
-
spec.description = "An unofficial Ruby wrapper for Tiktoken, " \
|
|
12
|
-
"a BPE tokenizer written by and used by OpenAI. It can be used to " \
|
|
13
|
-
"count the number of tokens in text before sending it to OpenAI APIs."
|
|
14
|
-
spec.homepage = "https://github.com/IAPark/tiktoken_ruby"
|
|
15
|
-
spec.license = "MIT"
|
|
16
|
-
spec.required_ruby_version = ">= 3.1.0"
|
|
17
|
-
spec.required_rubygems_version = ">= 3.4.0"
|
|
18
|
-
spec.platform = Gem::Platform::RUBY
|
|
19
|
-
|
|
20
|
-
spec.metadata["homepage_uri"] = spec.homepage
|
|
21
|
-
spec.metadata["source_code_uri"] = "https://github.com/IAPark/tiktoken_ruby"
|
|
22
|
-
spec.metadata["documentation_uri"] = "https://rubydoc.info/github/IAPark/tiktoken_ruby/main"
|
|
23
|
-
spec.files = Dir.chdir(__dir__) do
|
|
24
|
-
`git ls-files -z`.split("\x0").reject do |f|
|
|
25
|
-
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
spec.bindir = "exe"
|
|
29
|
-
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
30
|
-
spec.require_paths = ["lib"]
|
|
31
|
-
spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
|
|
32
|
-
spec.add_dependency "rb_sys", "~> 0.9"
|
|
33
|
-
end
|