tokenizers 0.5.1-aarch64-linux-musl → 0.5.3-aarch64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +22 -19
- data/LICENSE-THIRD-PARTY.txt +9 -9
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/3.3/tokenizers.so +0 -0
- data/lib/tokenizers/added_token.rb +7 -0
- data/lib/tokenizers/version.rb +1 -1
- data/lib/tokenizers.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '08700fb9bdcad5fd8f72a201ff9fb9d4d5738d0994ed395f8870696e3d2c8420'
|
4
|
+
data.tar.gz: ae92e4266649e977717c38ecfa0e7435e2a4cd5439f4156ebe9d60db05251413
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1bbba481e1d2f8c65ced5020d2d1b53526d9b31f7927a62fd39fafddf254dbcd87a525860f1734e91c4ac102453b449832995428c5626181c0d8e4506401d9d
|
7
|
+
data.tar.gz: 93d5e32e26ab8e9277b148e331d35f9673708fed1d3dd92cc0ae9d9651d4fe7aeda1c850c3326b91899d4ce65fd2efc2806a6e8fd21e0a1846d0594614a48cc0
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.5.3 (2024-09-17)
|
2
|
+
|
3
|
+
- Added `AddedToken` class
|
4
|
+
- Added precompiled gem for Windows
|
5
|
+
|
6
|
+
## 0.5.2 (2024-08-26)
|
7
|
+
|
8
|
+
- Added `from_str` method to `Tokenizer`
|
9
|
+
- Added `model` and `model=` methods to `Tokenizer`
|
10
|
+
- Added `decoder`, `pre_tokenizer`, `post_processor`, and `normalizer` methods to `Tokenizer`
|
11
|
+
- Added `decode` method to `Decoder`
|
12
|
+
|
1
13
|
## 0.5.1 (2024-08-13)
|
2
14
|
|
3
15
|
- Updated Tokenizers to 0.20.0
|
data/Cargo.lock
CHANGED
@@ -57,9 +57,12 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
|
57
57
|
|
58
58
|
[[package]]
|
59
59
|
name = "cc"
|
60
|
-
version = "1.1.
|
60
|
+
version = "1.1.15"
|
61
61
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
62
|
-
checksum = "
|
62
|
+
checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
|
63
|
+
dependencies = [
|
64
|
+
"shlex",
|
65
|
+
]
|
63
66
|
|
64
67
|
[[package]]
|
65
68
|
name = "cexpr"
|
@@ -301,9 +304,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
301
304
|
|
302
305
|
[[package]]
|
303
306
|
name = "libc"
|
304
|
-
version = "0.2.
|
307
|
+
version = "0.2.158"
|
305
308
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
306
|
-
checksum = "
|
309
|
+
checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
|
307
310
|
|
308
311
|
[[package]]
|
309
312
|
name = "libloading"
|
@@ -475,9 +478,9 @@ dependencies = [
|
|
475
478
|
|
476
479
|
[[package]]
|
477
480
|
name = "quote"
|
478
|
-
version = "1.0.
|
481
|
+
version = "1.0.37"
|
479
482
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
480
|
-
checksum = "
|
483
|
+
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
|
481
484
|
dependencies = [
|
482
485
|
"proc-macro2",
|
483
486
|
]
|
@@ -545,18 +548,18 @@ dependencies = [
|
|
545
548
|
|
546
549
|
[[package]]
|
547
550
|
name = "rb-sys"
|
548
|
-
version = "0.9.
|
551
|
+
version = "0.9.102"
|
549
552
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
550
|
-
checksum = "
|
553
|
+
checksum = "df4dec4b1d304c3b308a2cd86b1216ea45dd4361f4e9fa056f108332d0a450c1"
|
551
554
|
dependencies = [
|
552
555
|
"rb-sys-build",
|
553
556
|
]
|
554
557
|
|
555
558
|
[[package]]
|
556
559
|
name = "rb-sys-build"
|
557
|
-
version = "0.9.
|
560
|
+
version = "0.9.102"
|
558
561
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
559
|
-
checksum = "
|
562
|
+
checksum = "1d71de3e29d174b8fb17b5d4470f27d7aa2605f8a9d05fda0d3aeff30e05a570"
|
560
563
|
dependencies = [
|
561
564
|
"bindgen",
|
562
565
|
"lazy_static",
|
@@ -622,18 +625,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
622
625
|
|
623
626
|
[[package]]
|
624
627
|
name = "serde"
|
625
|
-
version = "1.0.
|
628
|
+
version = "1.0.209"
|
626
629
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
627
|
-
checksum = "
|
630
|
+
checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
|
628
631
|
dependencies = [
|
629
632
|
"serde_derive",
|
630
633
|
]
|
631
634
|
|
632
635
|
[[package]]
|
633
636
|
name = "serde_derive"
|
634
|
-
version = "1.0.
|
637
|
+
version = "1.0.209"
|
635
638
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
636
|
-
checksum = "
|
639
|
+
checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
|
637
640
|
dependencies = [
|
638
641
|
"proc-macro2",
|
639
642
|
"quote",
|
@@ -642,9 +645,9 @@ dependencies = [
|
|
642
645
|
|
643
646
|
[[package]]
|
644
647
|
name = "serde_json"
|
645
|
-
version = "1.0.
|
648
|
+
version = "1.0.127"
|
646
649
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
647
|
-
checksum = "
|
650
|
+
checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
|
648
651
|
dependencies = [
|
649
652
|
"itoa",
|
650
653
|
"memchr",
|
@@ -690,9 +693,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|
690
693
|
|
691
694
|
[[package]]
|
692
695
|
name = "syn"
|
693
|
-
version = "2.0.
|
696
|
+
version = "2.0.76"
|
694
697
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
695
|
-
checksum = "
|
698
|
+
checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
|
696
699
|
dependencies = [
|
697
700
|
"proc-macro2",
|
698
701
|
"quote",
|
@@ -721,7 +724,7 @@ dependencies = [
|
|
721
724
|
|
722
725
|
[[package]]
|
723
726
|
name = "tokenizers"
|
724
|
-
version = "0.5.
|
727
|
+
version = "0.5.3"
|
725
728
|
dependencies = [
|
726
729
|
"magnus",
|
727
730
|
"onig",
|
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -26,7 +26,7 @@ byteorder v1.5.0
|
|
26
26
|
https://github.com/BurntSushi/byteorder
|
27
27
|
Unlicense OR MIT
|
28
28
|
|
29
|
-
cc v1.1.
|
29
|
+
cc v1.1.15
|
30
30
|
https://github.com/rust-lang/cc-rs
|
31
31
|
MIT OR Apache-2.0
|
32
32
|
|
@@ -130,7 +130,7 @@ lazycell v1.3.0
|
|
130
130
|
https://github.com/indiv0/lazycell
|
131
131
|
MIT/Apache-2.0
|
132
132
|
|
133
|
-
libc v0.2.
|
133
|
+
libc v0.2.158
|
134
134
|
https://github.com/rust-lang/libc
|
135
135
|
MIT OR Apache-2.0
|
136
136
|
|
@@ -214,7 +214,7 @@ proc-macro2 v1.0.86
|
|
214
214
|
https://github.com/dtolnay/proc-macro2
|
215
215
|
MIT OR Apache-2.0
|
216
216
|
|
217
|
-
quote v1.0.
|
217
|
+
quote v1.0.37
|
218
218
|
https://github.com/dtolnay/quote
|
219
219
|
MIT OR Apache-2.0
|
220
220
|
|
@@ -242,11 +242,11 @@ rayon-core v1.12.1
|
|
242
242
|
https://github.com/rayon-rs/rayon
|
243
243
|
MIT OR Apache-2.0
|
244
244
|
|
245
|
-
rb-sys v0.9.
|
245
|
+
rb-sys v0.9.102
|
246
246
|
https://github.com/oxidize-rb/rb-sys
|
247
247
|
MIT OR Apache-2.0
|
248
248
|
|
249
|
-
rb-sys-build v0.9.
|
249
|
+
rb-sys-build v0.9.102
|
250
250
|
https://github.com/oxidize-rb/rb-sys
|
251
251
|
MIT OR Apache-2.0
|
252
252
|
|
@@ -278,15 +278,15 @@ seq-macro v0.3.5
|
|
278
278
|
https://github.com/dtolnay/seq-macro
|
279
279
|
MIT OR Apache-2.0
|
280
280
|
|
281
|
-
serde v1.0.
|
281
|
+
serde v1.0.209
|
282
282
|
https://serde.rs
|
283
283
|
MIT OR Apache-2.0
|
284
284
|
|
285
|
-
serde_derive v1.0.
|
285
|
+
serde_derive v1.0.209
|
286
286
|
https://serde.rs
|
287
287
|
MIT OR Apache-2.0
|
288
288
|
|
289
|
-
serde_json v1.0.
|
289
|
+
serde_json v1.0.127
|
290
290
|
https://github.com/serde-rs/json
|
291
291
|
MIT OR Apache-2.0
|
292
292
|
|
@@ -310,7 +310,7 @@ strsim v0.11.1
|
|
310
310
|
https://github.com/rapidfuzz/strsim-rs
|
311
311
|
MIT
|
312
312
|
|
313
|
-
syn v2.0.
|
313
|
+
syn v2.0.76
|
314
314
|
https://github.com/dtolnay/syn
|
315
315
|
MIT OR Apache-2.0
|
316
316
|
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tokenizers/version.rb
CHANGED
data/lib/tokenizers.rb
CHANGED
@@ -42,6 +42,7 @@ require_relative "tokenizers/trainers/word_level_trainer"
|
|
42
42
|
require_relative "tokenizers/trainers/word_piece_trainer"
|
43
43
|
|
44
44
|
# other
|
45
|
+
require_relative "tokenizers/added_token"
|
45
46
|
require_relative "tokenizers/char_bpe_tokenizer"
|
46
47
|
require_relative "tokenizers/encoding"
|
47
48
|
require_relative "tokenizers/from_pretrained"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: aarch64-linux-musl
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- lib/tokenizers/3.1/tokenizers.so
|
27
27
|
- lib/tokenizers/3.2/tokenizers.so
|
28
28
|
- lib/tokenizers/3.3/tokenizers.so
|
29
|
+
- lib/tokenizers/added_token.rb
|
29
30
|
- lib/tokenizers/char_bpe_tokenizer.rb
|
30
31
|
- lib/tokenizers/decoders/bpe_decoder.rb
|
31
32
|
- lib/tokenizers/decoders/ctc.rb
|