tokenizers 0.3.2-arm64-darwin → 0.3.3-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +6 -0
 - data/Cargo.lock +125 -90
 - data/LICENSE-THIRD-PARTY.txt +1669 -1491
 - data/lib/tokenizers/2.7/tokenizers.bundle +0 -0
 - data/lib/tokenizers/3.0/tokenizers.bundle +0 -0
 - data/lib/tokenizers/3.1/tokenizers.bundle +0 -0
 - data/lib/tokenizers/3.2/tokenizers.bundle +0 -0
 - data/lib/tokenizers/decoders/strip.rb +9 -0
 - data/lib/tokenizers/from_pretrained.rb +1 -1
 - data/lib/tokenizers/normalizers/prepend.rb +9 -0
 - data/lib/tokenizers/version.rb +1 -1
 - data/lib/tokenizers.rb +2 -0
 - metadata +4 -2
 
| 
         Binary file 
     | 
| 
         Binary file 
     | 
| 
         Binary file 
     | 
| 
         Binary file 
     | 
    
        data/lib/tokenizers/version.rb
    CHANGED
    
    
    
        data/lib/tokenizers.rb
    CHANGED
    
    | 
         @@ -9,6 +9,7 @@ end 
     | 
|
| 
       9 
9 
     | 
    
         
             
            require_relative "tokenizers/decoders/bpe_decoder"
         
     | 
| 
       10 
10 
     | 
    
         
             
            require_relative "tokenizers/decoders/ctc"
         
     | 
| 
       11 
11 
     | 
    
         
             
            require_relative "tokenizers/decoders/metaspace"
         
     | 
| 
      
 12 
     | 
    
         
            +
            require_relative "tokenizers/decoders/strip"
         
     | 
| 
       12 
13 
     | 
    
         
             
            require_relative "tokenizers/decoders/word_piece"
         
     | 
| 
       13 
14 
     | 
    
         | 
| 
       14 
15 
     | 
    
         
             
            # models
         
     | 
| 
         @@ -19,6 +20,7 @@ require_relative "tokenizers/models/unigram" 
     | 
|
| 
       19 
20 
     | 
    
         | 
| 
       20 
21 
     | 
    
         
             
            # normalizers
         
     | 
| 
       21 
22 
     | 
    
         
             
            require_relative "tokenizers/normalizers/bert_normalizer"
         
     | 
| 
      
 23 
     | 
    
         
            +
            require_relative "tokenizers/normalizers/prepend"
         
     | 
| 
       22 
24 
     | 
    
         
             
            require_relative "tokenizers/normalizers/strip"
         
     | 
| 
       23 
25 
     | 
    
         | 
| 
       24 
26 
     | 
    
         
             
            # pre-tokenizers
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: tokenizers
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.3. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.3.3
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: arm64-darwin
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Andrew Kane
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2023- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2023-04-09 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       13 
13 
     | 
    
         
             
            description: 
         
     | 
| 
       14 
14 
     | 
    
         
             
            email: andrew@ankane.org
         
     | 
| 
         @@ -31,6 +31,7 @@ files: 
     | 
|
| 
       31 
31 
     | 
    
         
             
            - lib/tokenizers/decoders/bpe_decoder.rb
         
     | 
| 
       32 
32 
     | 
    
         
             
            - lib/tokenizers/decoders/ctc.rb
         
     | 
| 
       33 
33 
     | 
    
         
             
            - lib/tokenizers/decoders/metaspace.rb
         
     | 
| 
      
 34 
     | 
    
         
            +
            - lib/tokenizers/decoders/strip.rb
         
     | 
| 
       34 
35 
     | 
    
         
             
            - lib/tokenizers/decoders/word_piece.rb
         
     | 
| 
       35 
36 
     | 
    
         
             
            - lib/tokenizers/encoding.rb
         
     | 
| 
       36 
37 
     | 
    
         
             
            - lib/tokenizers/from_pretrained.rb
         
     | 
| 
         @@ -39,6 +40,7 @@ files: 
     | 
|
| 
       39 
40 
     | 
    
         
             
            - lib/tokenizers/models/word_level.rb
         
     | 
| 
       40 
41 
     | 
    
         
             
            - lib/tokenizers/models/word_piece.rb
         
     | 
| 
       41 
42 
     | 
    
         
             
            - lib/tokenizers/normalizers/bert_normalizer.rb
         
     | 
| 
      
 43 
     | 
    
         
            +
            - lib/tokenizers/normalizers/prepend.rb
         
     | 
| 
       42 
44 
     | 
    
         
             
            - lib/tokenizers/normalizers/strip.rb
         
     | 
| 
       43 
45 
     | 
    
         
             
            - lib/tokenizers/pre_tokenizers/byte_level.rb
         
     | 
| 
       44 
46 
     | 
    
         
             
            - lib/tokenizers/pre_tokenizers/digits.rb
         
     |