tokenizers 0.5.2-x86_64-linux-musl → 0.5.3-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 952e7aa7504f266d818ec8cff8c9bd81d890bcb57c07113650fbfcd0875fb3b9
4
- data.tar.gz: 6a0a3915a1076b7675b4ffcc3e55447906ffc1e378c9a95d82132f40c76034e7
3
+ metadata.gz: fce129f3910c54922d726c2dfa4f501eee8e41cc35374636663219a14fae5556
4
+ data.tar.gz: a8c7931b5eb452264cd4c5901d194bc0ba955d8857078e5216544edd15db0d5f
5
5
  SHA512:
6
- metadata.gz: cd2390e33c79d8b6528199c8ca9af7727b1bf63f58fb1a4ce074caa243ab796dcf4cd77eab26f2f76e872d68ae29f169c616128affad49a39147717b3d01b428
7
- data.tar.gz: 4e8baa0e28f5218480baa55320c4afd0db653f5ebd42b6e60b975d0a23a713ca3afd6f8666fb40a52017355e385178100946f50f9e6308910c5f8e564cfa9655
6
+ metadata.gz: '08fa0d532d72de71ace9d071cce5c10d08a6c7f119381c6476a7eeeadd5968a0f37a30897c91e6e1dc975122d4899161ed9bff181169eed0940f1d7fa0363405'
7
+ data.tar.gz: 1ab0ded21ce97e9bc5de491d42c880fbb2e186a9725546db2fb86ef0fcebb64d8e92ace94ce2cbda0540cc07a6c806fae8ae359d82b71c2998c3799a59b266a1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.3 (2024-09-17)
2
+
3
+ - Added `AddedToken` class
4
+ - Added precompiled gem for Windows
5
+
1
6
  ## 0.5.2 (2024-08-26)
2
7
 
3
8
  - Added `from_str` method to `Tokenizer`
data/Cargo.lock CHANGED
@@ -724,7 +724,7 @@ dependencies = [
724
724
 
725
725
  [[package]]
726
726
  name = "tokenizers"
727
- version = "0.5.2"
727
+ version = "0.5.3"
728
728
  dependencies = [
729
729
  "magnus",
730
730
  "onig",
Binary file
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ module Tokenizers
2
+ class AddedToken
3
+ def self.new(content, **kwargs)
4
+ _new(content, kwargs)
5
+ end
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -42,6 +42,7 @@ require_relative "tokenizers/trainers/word_level_trainer"
42
42
  require_relative "tokenizers/trainers/word_piece_trainer"
43
43
 
44
44
  # other
45
+ require_relative "tokenizers/added_token"
45
46
  require_relative "tokenizers/char_bpe_tokenizer"
46
47
  require_relative "tokenizers/encoding"
47
48
  require_relative "tokenizers/from_pretrained"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: x86_64-linux-musl
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-26 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -26,6 +26,7 @@ files:
26
26
  - lib/tokenizers/3.1/tokenizers.so
27
27
  - lib/tokenizers/3.2/tokenizers.so
28
28
  - lib/tokenizers/3.3/tokenizers.so
29
+ - lib/tokenizers/added_token.rb
29
30
  - lib/tokenizers/char_bpe_tokenizer.rb
30
31
  - lib/tokenizers/decoders/bpe_decoder.rb
31
32
  - lib/tokenizers/decoders/ctc.rb