tokenizers 0.5.2-x86_64-linux → 0.5.3-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d8f65354a37cb71e55cf35e34146ef535a929dc9d1fe02db4e5609e10e243dfe
4
- data.tar.gz: 0a00b89865d9d7ecb7453a5554bd8a83a2f18755b8f4f6fcf3df81e880f83e4d
3
+ metadata.gz: c5a23e4520e57dd74f97517733d0916919493906a2fa82cfc037a76ff203113d
4
+ data.tar.gz: d2cbc6265ac9d74a5724178da71d961ec81a0872c371839e22f1a3c2b097d637
5
5
  SHA512:
6
- metadata.gz: f36d33f63a215b79319ff6570e8fab37587bc4fd5c69e8f4052cd642eaa64bb8d81bcca8d526186e7e124f124bd2d184795cf2ca2c2171159b743a456e42c527
7
- data.tar.gz: b75812b4d6f99d7f43e246565bf7f061fc71db127db564bd459d2d06ec676ae9c4233807bde6e4f168b1d5df0cd3d1cb4c6f44d3968a762cc3aead2325ea2e29
6
+ metadata.gz: de6f0db2c89c0f3476ac3a1f11d4c43387e8bdb9bd3b5790ed2be0c91ed9733ff9db8009d9f51c39d786f83e8e6bdde3af72e13d406ce6e644103fa260d5e97f
7
+ data.tar.gz: f9ebdc7f298ff9c8f39f63acbd861515b3c7a3aa7929973498be040aae27f9235ee3256f72b66f13a3bccfe658db9e10cb922bef7b77a531dd5e0d1babebd25f
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.3 (2024-09-17)
2
+
3
+ - Added `AddedToken` class
4
+ - Added precompiled gem for Windows
5
+
1
6
  ## 0.5.2 (2024-08-26)
2
7
 
3
8
  - Added `from_str` method to `Tokenizer`
data/Cargo.lock CHANGED
@@ -724,7 +724,7 @@ dependencies = [
724
724
 
725
725
  [[package]]
726
726
  name = "tokenizers"
727
- version = "0.5.2"
727
+ version = "0.5.3"
728
728
  dependencies = [
729
729
  "magnus",
730
730
  "onig",
Binary file
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ module Tokenizers
2
+ class AddedToken
3
+ def self.new(content, **kwargs)
4
+ _new(content, kwargs)
5
+ end
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -42,6 +42,7 @@ require_relative "tokenizers/trainers/word_level_trainer"
42
42
  require_relative "tokenizers/trainers/word_piece_trainer"
43
43
 
44
44
  # other
45
+ require_relative "tokenizers/added_token"
45
46
  require_relative "tokenizers/char_bpe_tokenizer"
46
47
  require_relative "tokenizers/encoding"
47
48
  require_relative "tokenizers/from_pretrained"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-26 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -26,6 +26,7 @@ files:
26
26
  - lib/tokenizers/3.1/tokenizers.so
27
27
  - lib/tokenizers/3.2/tokenizers.so
28
28
  - lib/tokenizers/3.3/tokenizers.so
29
+ - lib/tokenizers/added_token.rb
29
30
  - lib/tokenizers/char_bpe_tokenizer.rb
30
31
  - lib/tokenizers/decoders/bpe_decoder.rb
31
32
  - lib/tokenizers/decoders/ctc.rb