tokenizers 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +3 -3
- data/ext/tokenizers/Cargo.toml +1 -1
- data/ext/tokenizers/src/tokenizer.rs +8 -10
- data/lib/tokenizers/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa9c5791104b7c5df1a6a2afe9c071ad0c03308e3393cbb1f6d2be87385b03d5
|
4
|
+
data.tar.gz: 65fdbe5dff6259dfe13c9f3a0ee9a2330e1b4b11b872fdf98b8189f4c87232c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2460479cc807467ff4c7b185aa2f49386a2465e2a8fb430acd6062210d522e550cd1b1aa401a9b902315cf51f0dcf0041c8e66cf3907b549a27bff7d8864228d
|
7
|
+
data.tar.gz: c908e1daf079a35ac2e9da93fd43c7a02373f3aeefa06335830bec223984ed44e9c2748c15d5637e0c4e122a20d0f9f891c86244ea6c1050bbc60ac29762a3a9
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -385,9 +385,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
|
|
385
385
|
|
386
386
|
[[package]]
|
387
387
|
name = "magnus"
|
388
|
-
version = "0.8.
|
388
|
+
version = "0.8.1"
|
389
389
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
390
|
-
checksum = "
|
390
|
+
checksum = "bd2ac6e71886be00ac34db92aa732c793c5107c95191805b9a1c7e70e6d342e0"
|
391
391
|
dependencies = [
|
392
392
|
"magnus-macros",
|
393
393
|
"rb-sys",
|
@@ -784,7 +784,7 @@ dependencies = [
|
|
784
784
|
|
785
785
|
[[package]]
|
786
786
|
name = "tokenizers"
|
787
|
-
version = "0.6.
|
787
|
+
version = "0.6.1"
|
788
788
|
dependencies = [
|
789
789
|
"ahash",
|
790
790
|
"magnus",
|
data/ext/tokenizers/Cargo.toml
CHANGED
@@ -372,16 +372,14 @@ impl RbTokenizer {
|
|
372
372
|
Ok(input)
|
373
373
|
})
|
374
374
|
.collect::<RbResult<Vec<tk::EncodeInput>>>()?;
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
.map(
|
381
|
-
|
382
|
-
|
383
|
-
.map_err(RbError::from),
|
384
|
-
))
|
375
|
+
rb_self
|
376
|
+
.tokenizer
|
377
|
+
.borrow()
|
378
|
+
.encode_batch_char_offsets(input, add_special_tokens)
|
379
|
+
.map(|encodings| {
|
380
|
+
ruby.ary_from_iter(encodings.into_iter().map(Into::<RbEncoding>::into))
|
381
|
+
})
|
382
|
+
.map_err(RbError::from)
|
385
383
|
}
|
386
384
|
|
387
385
|
pub fn decode(&self, ids: Vec<u32>, skip_special_tokens: bool) -> RbResult<String> {
|
data/lib/tokenizers/version.rb
CHANGED