tokenizers 0.5.2-aarch64-linux-musl → 0.5.3-aarch64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: af71e9e211a8a3bc3e1a05a2c5fa937d54b0a92dcc42dc784369d14b40bb6d5f
4
- data.tar.gz: d2dc41d5e280fde549f6d39ce96bc639150937321ca6630f54e245690597fd22
3
+ metadata.gz: '08700fb9bdcad5fd8f72a201ff9fb9d4d5738d0994ed395f8870696e3d2c8420'
4
+ data.tar.gz: ae92e4266649e977717c38ecfa0e7435e2a4cd5439f4156ebe9d60db05251413
5
5
  SHA512:
6
- metadata.gz: 6c9db5d94f60535efaaf09efeac122bd703c1bbb7328c5d9963606bdebb6e92bea4239233471ff635d0b7c4d126d195a8e9ecd54ad31a3077e16c78ffadfbf72
7
- data.tar.gz: 02ab410e4350520f433c6b08059b415d80316a2e763d51c4c81db88d52d823d1850a6fc62dd4694cab7cff7bca0ee35790db3ac05302a19573abc84973dd0ff5
6
+ metadata.gz: a1bbba481e1d2f8c65ced5020d2d1b53526d9b31f7927a62fd39fafddf254dbcd87a525860f1734e91c4ac102453b449832995428c5626181c0d8e4506401d9d
7
+ data.tar.gz: 93d5e32e26ab8e9277b148e331d35f9673708fed1d3dd92cc0ae9d9651d4fe7aeda1c850c3326b91899d4ce65fd2efc2806a6e8fd21e0a1846d0594614a48cc0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.3 (2024-09-17)
2
+
3
+ - Added `AddedToken` class
4
+ - Added precompiled gem for Windows
5
+
1
6
  ## 0.5.2 (2024-08-26)
2
7
 
3
8
  - Added `from_str` method to `Tokenizer`
data/Cargo.lock CHANGED
@@ -724,7 +724,7 @@ dependencies = [
724
724
 
725
725
  [[package]]
726
726
  name = "tokenizers"
727
- version = "0.5.2"
727
+ version = "0.5.3"
728
728
  dependencies = [
729
729
  "magnus",
730
730
  "onig",
Binary file
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ module Tokenizers
2
+ class AddedToken
3
+ def self.new(content, **kwargs)
4
+ _new(content, kwargs)
5
+ end
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -42,6 +42,7 @@ require_relative "tokenizers/trainers/word_level_trainer"
42
42
  require_relative "tokenizers/trainers/word_piece_trainer"
43
43
 
44
44
  # other
45
+ require_relative "tokenizers/added_token"
45
46
  require_relative "tokenizers/char_bpe_tokenizer"
46
47
  require_relative "tokenizers/encoding"
47
48
  require_relative "tokenizers/from_pretrained"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: aarch64-linux-musl
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-26 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -26,6 +26,7 @@ files:
26
26
  - lib/tokenizers/3.1/tokenizers.so
27
27
  - lib/tokenizers/3.2/tokenizers.so
28
28
  - lib/tokenizers/3.3/tokenizers.so
29
+ - lib/tokenizers/added_token.rb
29
30
  - lib/tokenizers/char_bpe_tokenizer.rb
30
31
  - lib/tokenizers/decoders/bpe_decoder.rb
31
32
  - lib/tokenizers/decoders/ctc.rb