tokenizers 0.3.2-arm64-darwin → 0.3.3-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +125 -90
- data/LICENSE-THIRD-PARTY.txt +1669 -1491
- data/lib/tokenizers/2.7/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.0/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.1/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.2/tokenizers.bundle +0 -0
- data/lib/tokenizers/decoders/strip.rb +9 -0
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/normalizers/prepend.rb +9 -0
- data/lib/tokenizers/version.rb +1 -1
- data/lib/tokenizers.rb +2 -0
- metadata +4 -2
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tokenizers/version.rb
CHANGED
data/lib/tokenizers.rb
CHANGED
@@ -9,6 +9,7 @@ end
|
|
9
9
|
require_relative "tokenizers/decoders/bpe_decoder"
|
10
10
|
require_relative "tokenizers/decoders/ctc"
|
11
11
|
require_relative "tokenizers/decoders/metaspace"
|
12
|
+
require_relative "tokenizers/decoders/strip"
|
12
13
|
require_relative "tokenizers/decoders/word_piece"
|
13
14
|
|
14
15
|
# models
|
@@ -19,6 +20,7 @@ require_relative "tokenizers/models/unigram"
|
|
19
20
|
|
20
21
|
# normalizers
|
21
22
|
require_relative "tokenizers/normalizers/bert_normalizer"
|
23
|
+
require_relative "tokenizers/normalizers/prepend"
|
22
24
|
require_relative "tokenizers/normalizers/strip"
|
23
25
|
|
24
26
|
# pre-tokenizers
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -31,6 +31,7 @@ files:
|
|
31
31
|
- lib/tokenizers/decoders/bpe_decoder.rb
|
32
32
|
- lib/tokenizers/decoders/ctc.rb
|
33
33
|
- lib/tokenizers/decoders/metaspace.rb
|
34
|
+
- lib/tokenizers/decoders/strip.rb
|
34
35
|
- lib/tokenizers/decoders/word_piece.rb
|
35
36
|
- lib/tokenizers/encoding.rb
|
36
37
|
- lib/tokenizers/from_pretrained.rb
|
@@ -39,6 +40,7 @@ files:
|
|
39
40
|
- lib/tokenizers/models/word_level.rb
|
40
41
|
- lib/tokenizers/models/word_piece.rb
|
41
42
|
- lib/tokenizers/normalizers/bert_normalizer.rb
|
43
|
+
- lib/tokenizers/normalizers/prepend.rb
|
42
44
|
- lib/tokenizers/normalizers/strip.rb
|
43
45
|
- lib/tokenizers/pre_tokenizers/byte_level.rb
|
44
46
|
- lib/tokenizers/pre_tokenizers/digits.rb
|