tokenizers 0.3.0-x86_64-linux → 0.3.2-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -4
- data/Cargo.lock +13 -8
- data/LICENSE-THIRD-PARTY.txt +3 -29
- data/lib/tokenizers/2.7/tokenizers.so +0 -0
- data/lib/tokenizers/3.0/tokenizers.so +0 -0
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c255ccee12a9f089e4c55c80c8874d2eb236bd8e2d2bc5b07a56b69ff553415
|
4
|
+
data.tar.gz: fe58ce9b34f3220aec3ae2f8090caf0cc519b3dacc187328a7b364b38f3d430f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6745dcdda9eb9aecbf7fc31755107de76110ab6ed9d1ffcedbc63e6e034c7d4276ba6de959763ba4463dcbb2ec8cd1783680f558a733977c6799cd427064b6af
|
7
|
+
data.tar.gz: 959f49d956aafee7556a38b724b812b7c49ff2e22f55b0770ff069bafd7dcf63419d6dc33b65cef71da5b45b020534d6905d6770e95ce6142b9f57e59c3582bc
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,12 @@
|
|
1
|
-
## 0.3.
|
1
|
+
## 0.3.2 (2023-03-06)
|
2
|
+
|
3
|
+
- Added precompiled gem for Linux x86-64 MUSL
|
4
|
+
|
5
|
+
## 0.3.1 (2023-02-08)
|
6
|
+
|
7
|
+
- Fixed error with Ruby 2.7
|
8
|
+
|
9
|
+
## 0.3.0 (2023-02-07)
|
2
10
|
|
3
11
|
- Added support for training tokenizers
|
4
12
|
- Added more methods to `Tokenizer`
|
@@ -7,20 +15,20 @@
|
|
7
15
|
- Changed `encode` method to include special tokens by default
|
8
16
|
- Changed how offsets are calculated for strings with multibyte characters
|
9
17
|
|
10
|
-
## 0.2.3 (
|
18
|
+
## 0.2.3 (2023-01-22)
|
11
19
|
|
12
20
|
- Added `add_special_tokens` option to `encode` method
|
13
21
|
- Added warning about `encode` method including special tokens by default in 0.3.0
|
14
22
|
- Added more methods to `Encoding`
|
15
23
|
- Fixed error with precompiled gem on Mac ARM
|
16
24
|
|
17
|
-
## 0.2.2 (
|
25
|
+
## 0.2.2 (2023-01-15)
|
18
26
|
|
19
27
|
- Added precompiled gem for Linux ARM
|
20
28
|
- Added `from_file` method
|
21
29
|
- Fixed error with precompiled gem on Linux x86-64
|
22
30
|
|
23
|
-
## 0.2.1 (
|
31
|
+
## 0.2.1 (2023-01-12)
|
24
32
|
|
25
33
|
- Added support for Ruby 3.2
|
26
34
|
|
data/Cargo.lock
CHANGED
@@ -353,7 +353,8 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
|
|
353
353
|
[[package]]
|
354
354
|
name = "magnus"
|
355
355
|
version = "0.5.0"
|
356
|
-
source = "
|
356
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
357
|
+
checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
|
357
358
|
dependencies = [
|
358
359
|
"magnus-macros",
|
359
360
|
"rb-sys",
|
@@ -362,8 +363,9 @@ dependencies = [
|
|
362
363
|
|
363
364
|
[[package]]
|
364
365
|
name = "magnus-macros"
|
365
|
-
version = "0.
|
366
|
-
source = "
|
366
|
+
version = "0.4.0"
|
367
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
368
|
+
checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
|
367
369
|
dependencies = [
|
368
370
|
"proc-macro2",
|
369
371
|
"quote",
|
@@ -552,22 +554,25 @@ dependencies = [
|
|
552
554
|
|
553
555
|
[[package]]
|
554
556
|
name = "rb-sys"
|
555
|
-
version = "0.9.
|
557
|
+
version = "0.9.65"
|
556
558
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
557
|
-
checksum = "
|
559
|
+
checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
|
558
560
|
dependencies = [
|
559
561
|
"rb-sys-build",
|
560
562
|
]
|
561
563
|
|
562
564
|
[[package]]
|
563
565
|
name = "rb-sys-build"
|
564
|
-
version = "0.9.
|
566
|
+
version = "0.9.65"
|
565
567
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
566
|
-
checksum = "
|
568
|
+
checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
|
567
569
|
dependencies = [
|
568
570
|
"bindgen",
|
571
|
+
"lazy_static",
|
572
|
+
"quote",
|
569
573
|
"regex",
|
570
574
|
"shell-words",
|
575
|
+
"syn",
|
571
576
|
]
|
572
577
|
|
573
578
|
[[package]]
|
@@ -711,7 +716,7 @@ dependencies = [
|
|
711
716
|
|
712
717
|
[[package]]
|
713
718
|
name = "tokenizers"
|
714
|
-
version = "0.
|
719
|
+
version = "0.3.1"
|
715
720
|
dependencies = [
|
716
721
|
"magnus",
|
717
722
|
"onig",
|
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -154,7 +154,7 @@ magnus v0.5.0
|
|
154
154
|
https://github.com/matsadler/magnus
|
155
155
|
MIT
|
156
156
|
|
157
|
-
magnus-macros v0.
|
157
|
+
magnus-macros v0.4.0
|
158
158
|
https://github.com/matsadler/magnus
|
159
159
|
MIT
|
160
160
|
|
@@ -242,11 +242,11 @@ rayon-core v1.10.2
|
|
242
242
|
https://github.com/rayon-rs/rayon
|
243
243
|
MIT OR Apache-2.0
|
244
244
|
|
245
|
-
rb-sys v0.9.
|
245
|
+
rb-sys v0.9.65
|
246
246
|
https://github.com/oxidize-rb/rb-sys
|
247
247
|
MIT OR Apache-2.0
|
248
248
|
|
249
|
-
rb-sys-build v0.9.
|
249
|
+
rb-sys-build v0.9.65
|
250
250
|
https://github.com/oxidize-rb/rb-sys
|
251
251
|
MIT OR Apache-2.0
|
252
252
|
|
@@ -7795,32 +7795,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
7795
7795
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
7796
7796
|
SOFTWARE.
|
7797
7797
|
|
7798
|
-
================================================================================
|
7799
|
-
magnus magnus-macros/LICENSE
|
7800
|
-
================================================================================
|
7801
|
-
|
7802
|
-
MIT License
|
7803
|
-
|
7804
|
-
Copyright (c) 2022, 2021 Matthew Sadler
|
7805
|
-
|
7806
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7807
|
-
of this software and associated documentation files (the "Software"), to deal
|
7808
|
-
in the Software without restriction, including without limitation the rights
|
7809
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7810
|
-
copies of the Software, and to permit persons to whom the Software is
|
7811
|
-
furnished to do so, subject to the following conditions:
|
7812
|
-
|
7813
|
-
The above copyright notice and this permission notice shall be included in all
|
7814
|
-
copies or substantial portions of the Software.
|
7815
|
-
|
7816
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
7817
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
7818
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
7819
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
7820
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
7821
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
7822
|
-
SOFTWARE.
|
7823
|
-
|
7824
7798
|
================================================================================
|
7825
7799
|
magnus-macros LICENSE
|
7826
7800
|
================================================================================
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -44,7 +44,7 @@ module Tokenizers
|
|
44
44
|
def cached_path(cache_dir, url, options)
|
45
45
|
fsum = Digest::SHA256.hexdigest(url)
|
46
46
|
meta_paths = Dir[File.join(cache_dir, "#{fsum}.*.meta")]
|
47
|
-
meta = meta_paths.map { |f| JSON.
|
47
|
+
meta = meta_paths.map { |f| JSON.parse(File.read(f)) }.max_by { |m| m["creation_time"] }
|
48
48
|
etag = meta["etag"] if meta
|
49
49
|
|
50
50
|
if etag
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|