tokenizers 0.3.2-arm64-darwin → 0.3.3-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Decoders
3
+ class Strip
4
+ def self.new(content: " ", start: 0, stop: 0)
5
+ _new(content, start, stop)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.13.2"
4
+ TOKENIZERS_VERSION = "0.13.3"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Normalizers
3
+ class Prepend
4
+ def self.new(prepend: "▁")
5
+ _new(prepend)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.3.2"
2
+ VERSION = "0.3.3"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -9,6 +9,7 @@ end
9
9
  require_relative "tokenizers/decoders/bpe_decoder"
10
10
  require_relative "tokenizers/decoders/ctc"
11
11
  require_relative "tokenizers/decoders/metaspace"
12
+ require_relative "tokenizers/decoders/strip"
12
13
  require_relative "tokenizers/decoders/word_piece"
13
14
 
14
15
  # models
@@ -19,6 +20,7 @@ require_relative "tokenizers/models/unigram"
19
20
 
20
21
  # normalizers
21
22
  require_relative "tokenizers/normalizers/bert_normalizer"
23
+ require_relative "tokenizers/normalizers/prepend"
22
24
  require_relative "tokenizers/normalizers/strip"
23
25
 
24
26
  # pre-tokenizers
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-07 00:00:00.000000000 Z
11
+ date: 2023-04-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -31,6 +31,7 @@ files:
31
31
  - lib/tokenizers/decoders/bpe_decoder.rb
32
32
  - lib/tokenizers/decoders/ctc.rb
33
33
  - lib/tokenizers/decoders/metaspace.rb
34
+ - lib/tokenizers/decoders/strip.rb
34
35
  - lib/tokenizers/decoders/word_piece.rb
35
36
  - lib/tokenizers/encoding.rb
36
37
  - lib/tokenizers/from_pretrained.rb
@@ -39,6 +40,7 @@ files:
39
40
  - lib/tokenizers/models/word_level.rb
40
41
  - lib/tokenizers/models/word_piece.rb
41
42
  - lib/tokenizers/normalizers/bert_normalizer.rb
43
+ - lib/tokenizers/normalizers/prepend.rb
42
44
  - lib/tokenizers/normalizers/strip.rb
43
45
  - lib/tokenizers/pre_tokenizers/byte_level.rb
44
46
  - lib/tokenizers/pre_tokenizers/digits.rb