tokenizers 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -5
- data/Cargo.lock +12 -7
- data/ext/tokenizers/Cargo.toml +1 -1
- data/lib/tokenizers/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 11fdc53989bc2285096bc5eb4a971426e153375ada43bc8c51d2a191b42fe02d
|
|
4
|
+
data.tar.gz: 92d4d8bef2c4013d5cf0d55bff30bc070ca6e59430f87b96f9147075216a1c1d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7c91c33078b6c5b23a6080908fa184e2222922f35bd6e1c439329525f4554f939cf98ca4dcd3017d858cdb6ec2a96f6671ce5dc425ca643d482b49b04af00f4c
|
|
7
|
+
data.tar.gz: 74aad6c458792570ace93107aacccb8a9df7b61228dd062bd7bcaf015a53d4f6b4b1fcace09c69bc959bd9b04598d315344d4edc7e6abbe1603f3b92d29ab711
|
data/CHANGELOG.md
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
## 0.3.
|
|
1
|
+
## 0.3.2 (2023-03-06)
|
|
2
|
+
|
|
3
|
+
- Added precompiled gem for Linux x86-64 MUSL
|
|
4
|
+
|
|
5
|
+
## 0.3.1 (2023-02-08)
|
|
2
6
|
|
|
3
7
|
- Fixed error with Ruby 2.7
|
|
4
8
|
|
|
5
|
-
## 0.3.0 (
|
|
9
|
+
## 0.3.0 (2023-02-07)
|
|
6
10
|
|
|
7
11
|
- Added support for training tokenizers
|
|
8
12
|
- Added more methods to `Tokenizer`
|
|
@@ -11,20 +15,20 @@
|
|
|
11
15
|
- Changed `encode` method to include special tokens by default
|
|
12
16
|
- Changed how offsets are calculated for strings with multibyte characters
|
|
13
17
|
|
|
14
|
-
## 0.2.3 (
|
|
18
|
+
## 0.2.3 (2023-01-22)
|
|
15
19
|
|
|
16
20
|
- Added `add_special_tokens` option to `encode` method
|
|
17
21
|
- Added warning about `encode` method including special tokens by default in 0.3.0
|
|
18
22
|
- Added more methods to `Encoding`
|
|
19
23
|
- Fixed error with precompiled gem on Mac ARM
|
|
20
24
|
|
|
21
|
-
## 0.2.2 (
|
|
25
|
+
## 0.2.2 (2023-01-15)
|
|
22
26
|
|
|
23
27
|
- Added precompiled gem for Linux ARM
|
|
24
28
|
- Added `from_file` method
|
|
25
29
|
- Fixed error with precompiled gem on Linux x86-64
|
|
26
30
|
|
|
27
|
-
## 0.2.1 (
|
|
31
|
+
## 0.2.1 (2023-01-12)
|
|
28
32
|
|
|
29
33
|
- Added support for Ruby 3.2
|
|
30
34
|
|
data/Cargo.lock
CHANGED
|
@@ -353,7 +353,8 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
|
|
|
353
353
|
[[package]]
|
|
354
354
|
name = "magnus"
|
|
355
355
|
version = "0.5.0"
|
|
356
|
-
source = "
|
|
356
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
357
|
+
checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
|
|
357
358
|
dependencies = [
|
|
358
359
|
"magnus-macros",
|
|
359
360
|
"rb-sys",
|
|
@@ -362,8 +363,9 @@ dependencies = [
|
|
|
362
363
|
|
|
363
364
|
[[package]]
|
|
364
365
|
name = "magnus-macros"
|
|
365
|
-
version = "0.
|
|
366
|
-
source = "
|
|
366
|
+
version = "0.4.0"
|
|
367
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
368
|
+
checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
|
|
367
369
|
dependencies = [
|
|
368
370
|
"proc-macro2",
|
|
369
371
|
"quote",
|
|
@@ -552,22 +554,25 @@ dependencies = [
|
|
|
552
554
|
|
|
553
555
|
[[package]]
|
|
554
556
|
name = "rb-sys"
|
|
555
|
-
version = "0.9.
|
|
557
|
+
version = "0.9.65"
|
|
556
558
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
557
|
-
checksum = "
|
|
559
|
+
checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
|
|
558
560
|
dependencies = [
|
|
559
561
|
"rb-sys-build",
|
|
560
562
|
]
|
|
561
563
|
|
|
562
564
|
[[package]]
|
|
563
565
|
name = "rb-sys-build"
|
|
564
|
-
version = "0.9.
|
|
566
|
+
version = "0.9.65"
|
|
565
567
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
566
|
-
checksum = "
|
|
568
|
+
checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
|
|
567
569
|
dependencies = [
|
|
568
570
|
"bindgen",
|
|
571
|
+
"lazy_static",
|
|
572
|
+
"quote",
|
|
569
573
|
"regex",
|
|
570
574
|
"shell-words",
|
|
575
|
+
"syn",
|
|
571
576
|
]
|
|
572
577
|
|
|
573
578
|
[[package]]
|
data/ext/tokenizers/Cargo.toml
CHANGED
|
@@ -10,7 +10,7 @@ publish = false
|
|
|
10
10
|
crate-type = ["cdylib"]
|
|
11
11
|
|
|
12
12
|
[dependencies]
|
|
13
|
-
magnus =
|
|
13
|
+
magnus = "0.5"
|
|
14
14
|
onig = { version = "6.0", default-features = false }
|
|
15
15
|
serde = { version = "1.0", features = ["rc", "derive"] }
|
|
16
16
|
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tokenizers
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-03-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|