tokenizers 0.5.2-arm64-darwin → 0.5.3-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a00c67519f2c2bb09c0fad42f396006a341be62b41d751096ef772158ed30c7d
4
- data.tar.gz: 977b690303058422c76c41ebde6a8fd9981063cb88423aff535faf8a5c31d5a1
3
+ metadata.gz: dbf96bf34bc5b762a120c8bb0d9d20b637eccaf31b1650d45b46c722b4c7f1b4
4
+ data.tar.gz: 056b0d1dd8d14dbb52c311caa9aba9973ac9ea0ea3b9e26377e2fc69cc4ce598
5
5
  SHA512:
6
- metadata.gz: 7c7ac23930faa7c58cb3c1f282c3fafb1937bc097facaaa45cfdd74f7dc0c59dee57896bfbbd5fd052e6ae3bc849affb4109dbc59a2a75ee4c4f9e644c441c65
7
- data.tar.gz: 9bd2569f96710bc58aeeabd39d991ff56f13682dd86a271cc03dad2205e2a6732bb60c9eab06138405c2cf7d77df0196321d6cf7df6eac6b4f74caabb6d0a9e6
6
+ metadata.gz: c4488747d73279ac570b62a9afa3c8d7751d8b5a30684142a7879538a1450a31955b87cf498c7e671e80a92671166fb210ff992d04d3fa76e21d284884bef86e
7
+ data.tar.gz: 97151485dc583f4dfd9bd4cb4457dd411c1bf9a8953800fe28d532d2ac300a6214220f09a06b8b4e3fb052067f159d010b59feb5cc1e5396225f3d248206d469
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.3 (2024-09-17)
2
+
3
+ - Added `AddedToken` class
4
+ - Added precompiled gem for Windows
5
+
1
6
  ## 0.5.2 (2024-08-26)
2
7
 
3
8
  - Added `from_str` method to `Tokenizer`
data/Cargo.lock CHANGED
@@ -724,7 +724,7 @@ dependencies = [
724
724
 
725
725
  [[package]]
726
726
  name = "tokenizers"
727
- version = "0.5.2"
727
+ version = "0.5.3"
728
728
  dependencies = [
729
729
  "magnus",
730
730
  "onig",
Binary file
Binary file
Binary file
@@ -0,0 +1,7 @@
1
+ module Tokenizers
2
+ class AddedToken
3
+ def self.new(content, **kwargs)
4
+ _new(content, kwargs)
5
+ end
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -42,6 +42,7 @@ require_relative "tokenizers/trainers/word_level_trainer"
42
42
  require_relative "tokenizers/trainers/word_piece_trainer"
43
43
 
44
44
  # other
45
+ require_relative "tokenizers/added_token"
45
46
  require_relative "tokenizers/char_bpe_tokenizer"
46
47
  require_relative "tokenizers/encoding"
47
48
  require_relative "tokenizers/from_pretrained"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-26 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -26,6 +26,7 @@ files:
26
26
  - lib/tokenizers/3.1/tokenizers.bundle
27
27
  - lib/tokenizers/3.2/tokenizers.bundle
28
28
  - lib/tokenizers/3.3/tokenizers.bundle
29
+ - lib/tokenizers/added_token.rb
29
30
  - lib/tokenizers/char_bpe_tokenizer.rb
30
31
  - lib/tokenizers/decoders/bpe_decoder.rb
31
32
  - lib/tokenizers/decoders/ctc.rb