tokenizers 0.5.2-x86_64-darwin → 0.5.3-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 17d7c28ef87243dd719120f643f68f7cfc005b712974beb9f9513a29c3706b68
4
- data.tar.gz: 218ab31a2944142938aa15f31eb0967563151d171accbb842b3d1dcf42cbca5f
3
+ metadata.gz: 2db1df1eec1cbf155a1f3ac69fff4b38ec3409cf758de3360f04be39e0a18168
4
+ data.tar.gz: d098227ba4ae83f845497ff78ca8eca54eec20f79c4e5ad3425e3dd92eeb693c
5
5
  SHA512:
6
- metadata.gz: c2e9f03bc0fb467a9187770c6825561c9d452ff853193fc176d5a4149fdb446f955e8aaf187a141ff98a4cbdc7d29085a291dfbd5bb9c46b684fa784b14cafb6
7
- data.tar.gz: 20a3477fa09718a5f2dcf270bfb4a0343b33a3567097a24728456d73152ca19941d6eaa0c5194830d8c837d8774afb019f729f31788ec4f9f8be84c9d4e0dbc4
6
+ metadata.gz: 85e8b49905169f90c821b57800c49b845428c08526255ce38d7841bdb2cc96f687b47b87e9fa4d82fdcd21b0a4357535ac07ca888b91612ce4227d9a064db5ed
7
+ data.tar.gz: b6a3debceba0ae20a49dca53a968e6cc0efabcbfe865652a8ad60da723b1399d6060ba80ba271ed4e489a9d29836585bc15fbaf60ecb44f5905daf2c5fca33fe
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.3 (2024-09-17)
2
+
3
+ - Added `AddedToken` class
4
+ - Added precompiled gem for Windows
5
+
1
6
  ## 0.5.2 (2024-08-26)
2
7
 
3
8
  - Added `from_str` method to `Tokenizer`
data/Cargo.lock CHANGED
@@ -724,7 +724,7 @@ dependencies = [
724
724
 
725
725
  [[package]]
726
726
  name = "tokenizers"
727
- version = "0.5.2"
727
+ version = "0.5.3"
728
728
  dependencies = [
729
729
  "magnus",
730
730
  "onig",
Binary file
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ module Tokenizers
2
+ class AddedToken
3
+ def self.new(content, **kwargs)
4
+ _new(content, kwargs)
5
+ end
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -42,6 +42,7 @@ require_relative "tokenizers/trainers/word_level_trainer"
42
42
  require_relative "tokenizers/trainers/word_piece_trainer"
43
43
 
44
44
  # other
45
+ require_relative "tokenizers/added_token"
45
46
  require_relative "tokenizers/char_bpe_tokenizer"
46
47
  require_relative "tokenizers/encoding"
47
48
  require_relative "tokenizers/from_pretrained"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: x86_64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-26 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -26,6 +26,7 @@ files:
26
26
  - lib/tokenizers/3.1/tokenizers.bundle
27
27
  - lib/tokenizers/3.2/tokenizers.bundle
28
28
  - lib/tokenizers/3.3/tokenizers.bundle
29
+ - lib/tokenizers/added_token.rb
29
30
  - lib/tokenizers/char_bpe_tokenizer.rb
30
31
  - lib/tokenizers/decoders/bpe_decoder.rb
31
32
  - lib/tokenizers/decoders/ctc.rb