tokenizers 0.3.0-x86_64-linux → 0.3.2-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -4
- data/Cargo.lock +13 -8
- data/LICENSE-THIRD-PARTY.txt +3 -29
- data/lib/tokenizers/2.7/tokenizers.so +0 -0
- data/lib/tokenizers/3.0/tokenizers.so +0 -0
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 5c255ccee12a9f089e4c55c80c8874d2eb236bd8e2d2bc5b07a56b69ff553415
         | 
| 4 | 
            +
              data.tar.gz: fe58ce9b34f3220aec3ae2f8090caf0cc519b3dacc187328a7b364b38f3d430f
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 6745dcdda9eb9aecbf7fc31755107de76110ab6ed9d1ffcedbc63e6e034c7d4276ba6de959763ba4463dcbb2ec8cd1783680f558a733977c6799cd427064b6af
         | 
| 7 | 
            +
              data.tar.gz: 959f49d956aafee7556a38b724b812b7c49ff2e22f55b0770ff069bafd7dcf63419d6dc33b65cef71da5b45b020534d6905d6770e95ce6142b9f57e59c3582bc
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,4 +1,12 @@ | |
| 1 | 
            -
            ## 0.3. | 
| 1 | 
            +
            ## 0.3.2 (2023-03-06)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            - Added precompiled gem for Linux x86-64 MUSL
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ## 0.3.1 (2023-02-08)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            - Fixed error with Ruby 2.7
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            ## 0.3.0 (2023-02-07)
         | 
| 2 10 |  | 
| 3 11 | 
             
            - Added support for training tokenizers
         | 
| 4 12 | 
             
            - Added more methods to `Tokenizer`
         | 
| @@ -7,20 +15,20 @@ | |
| 7 15 | 
             
            - Changed `encode` method to include special tokens by default
         | 
| 8 16 | 
             
            - Changed how offsets are calculated for strings with multibyte characters
         | 
| 9 17 |  | 
| 10 | 
            -
            ## 0.2.3 ( | 
| 18 | 
            +
            ## 0.2.3 (2023-01-22)
         | 
| 11 19 |  | 
| 12 20 | 
             
            - Added `add_special_tokens` option to `encode` method
         | 
| 13 21 | 
             
            - Added warning about `encode` method including special tokens by default in 0.3.0
         | 
| 14 22 | 
             
            - Added more methods to `Encoding`
         | 
| 15 23 | 
             
            - Fixed error with precompiled gem on Mac ARM
         | 
| 16 24 |  | 
| 17 | 
            -
            ## 0.2.2 ( | 
| 25 | 
            +
            ## 0.2.2 (2023-01-15)
         | 
| 18 26 |  | 
| 19 27 | 
             
            - Added precompiled gem for Linux ARM
         | 
| 20 28 | 
             
            - Added `from_file` method
         | 
| 21 29 | 
             
            - Fixed error with precompiled gem on Linux x86-64
         | 
| 22 30 |  | 
| 23 | 
            -
            ## 0.2.1 ( | 
| 31 | 
            +
            ## 0.2.1 (2023-01-12)
         | 
| 24 32 |  | 
| 25 33 | 
             
            - Added support for Ruby 3.2
         | 
| 26 34 |  | 
    
        data/Cargo.lock
    CHANGED
    
    | @@ -353,7 +353,8 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" | |
| 353 353 | 
             
            [[package]]
         | 
| 354 354 | 
             
            name = "magnus"
         | 
| 355 355 | 
             
            version = "0.5.0"
         | 
| 356 | 
            -
            source = " | 
| 356 | 
            +
            source = "registry+https://github.com/rust-lang/crates.io-index"
         | 
| 357 | 
            +
            checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
         | 
| 357 358 | 
             
            dependencies = [
         | 
| 358 359 | 
             
             "magnus-macros",
         | 
| 359 360 | 
             
             "rb-sys",
         | 
| @@ -362,8 +363,9 @@ dependencies = [ | |
| 362 363 |  | 
| 363 364 | 
             
            [[package]]
         | 
| 364 365 | 
             
            name = "magnus-macros"
         | 
| 365 | 
            -
            version = "0. | 
| 366 | 
            -
            source = " | 
| 366 | 
            +
            version = "0.4.0"
         | 
| 367 | 
            +
            source = "registry+https://github.com/rust-lang/crates.io-index"
         | 
| 368 | 
            +
            checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
         | 
| 367 369 | 
             
            dependencies = [
         | 
| 368 370 | 
             
             "proc-macro2",
         | 
| 369 371 | 
             
             "quote",
         | 
| @@ -552,22 +554,25 @@ dependencies = [ | |
| 552 554 |  | 
| 553 555 | 
             
            [[package]]
         | 
| 554 556 | 
             
            name = "rb-sys"
         | 
| 555 | 
            -
            version = "0.9. | 
| 557 | 
            +
            version = "0.9.65"
         | 
| 556 558 | 
             
            source = "registry+https://github.com/rust-lang/crates.io-index"
         | 
| 557 | 
            -
            checksum = " | 
| 559 | 
            +
            checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
         | 
| 558 560 | 
             
            dependencies = [
         | 
| 559 561 | 
             
             "rb-sys-build",
         | 
| 560 562 | 
             
            ]
         | 
| 561 563 |  | 
| 562 564 | 
             
            [[package]]
         | 
| 563 565 | 
             
            name = "rb-sys-build"
         | 
| 564 | 
            -
            version = "0.9. | 
| 566 | 
            +
            version = "0.9.65"
         | 
| 565 567 | 
             
            source = "registry+https://github.com/rust-lang/crates.io-index"
         | 
| 566 | 
            -
            checksum = " | 
| 568 | 
            +
            checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
         | 
| 567 569 | 
             
            dependencies = [
         | 
| 568 570 | 
             
             "bindgen",
         | 
| 571 | 
            +
             "lazy_static",
         | 
| 572 | 
            +
             "quote",
         | 
| 569 573 | 
             
             "regex",
         | 
| 570 574 | 
             
             "shell-words",
         | 
| 575 | 
            +
             "syn",
         | 
| 571 576 | 
             
            ]
         | 
| 572 577 |  | 
| 573 578 | 
             
            [[package]]
         | 
| @@ -711,7 +716,7 @@ dependencies = [ | |
| 711 716 |  | 
| 712 717 | 
             
            [[package]]
         | 
| 713 718 | 
             
            name = "tokenizers"
         | 
| 714 | 
            -
            version = "0. | 
| 719 | 
            +
            version = "0.3.1"
         | 
| 715 720 | 
             
            dependencies = [
         | 
| 716 721 | 
             
             "magnus",
         | 
| 717 722 | 
             
             "onig",
         | 
    
        data/LICENSE-THIRD-PARTY.txt
    CHANGED
    
    | @@ -154,7 +154,7 @@ magnus v0.5.0 | |
| 154 154 | 
             
            https://github.com/matsadler/magnus
         | 
| 155 155 | 
             
            MIT
         | 
| 156 156 |  | 
| 157 | 
            -
            magnus-macros v0. | 
| 157 | 
            +
            magnus-macros v0.4.0
         | 
| 158 158 | 
             
            https://github.com/matsadler/magnus
         | 
| 159 159 | 
             
            MIT
         | 
| 160 160 |  | 
| @@ -242,11 +242,11 @@ rayon-core v1.10.2 | |
| 242 242 | 
             
            https://github.com/rayon-rs/rayon
         | 
| 243 243 | 
             
            MIT OR Apache-2.0
         | 
| 244 244 |  | 
| 245 | 
            -
            rb-sys v0.9. | 
| 245 | 
            +
            rb-sys v0.9.65
         | 
| 246 246 | 
             
            https://github.com/oxidize-rb/rb-sys
         | 
| 247 247 | 
             
            MIT OR Apache-2.0
         | 
| 248 248 |  | 
| 249 | 
            -
            rb-sys-build v0.9. | 
| 249 | 
            +
            rb-sys-build v0.9.65
         | 
| 250 250 | 
             
            https://github.com/oxidize-rb/rb-sys
         | 
| 251 251 | 
             
            MIT OR Apache-2.0
         | 
| 252 252 |  | 
| @@ -7795,32 +7795,6 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 7795 7795 | 
             
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 7796 7796 | 
             
            SOFTWARE.
         | 
| 7797 7797 |  | 
| 7798 | 
            -
            ================================================================================
         | 
| 7799 | 
            -
            magnus magnus-macros/LICENSE
         | 
| 7800 | 
            -
            ================================================================================
         | 
| 7801 | 
            -
             | 
| 7802 | 
            -
            MIT License
         | 
| 7803 | 
            -
             | 
| 7804 | 
            -
            Copyright (c) 2022, 2021 Matthew Sadler
         | 
| 7805 | 
            -
             | 
| 7806 | 
            -
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 7807 | 
            -
            of this software and associated documentation files (the "Software"), to deal
         | 
| 7808 | 
            -
            in the Software without restriction, including without limitation the rights
         | 
| 7809 | 
            -
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 7810 | 
            -
            copies of the Software, and to permit persons to whom the Software is
         | 
| 7811 | 
            -
            furnished to do so, subject to the following conditions:
         | 
| 7812 | 
            -
             | 
| 7813 | 
            -
            The above copyright notice and this permission notice shall be included in all
         | 
| 7814 | 
            -
            copies or substantial portions of the Software.
         | 
| 7815 | 
            -
             | 
| 7816 | 
            -
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         | 
| 7817 | 
            -
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         | 
| 7818 | 
            -
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         | 
| 7819 | 
            -
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 7820 | 
            -
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 7821 | 
            -
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 7822 | 
            -
            SOFTWARE.
         | 
| 7823 | 
            -
             | 
| 7824 7798 | 
             
            ================================================================================
         | 
| 7825 7799 | 
             
            magnus-macros LICENSE
         | 
| 7826 7800 | 
             
            ================================================================================
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| @@ -44,7 +44,7 @@ module Tokenizers | |
| 44 44 | 
             
                def cached_path(cache_dir, url, options)
         | 
| 45 45 | 
             
                  fsum = Digest::SHA256.hexdigest(url)
         | 
| 46 46 | 
             
                  meta_paths = Dir[File.join(cache_dir, "#{fsum}.*.meta")]
         | 
| 47 | 
            -
                  meta = meta_paths.map { |f| JSON. | 
| 47 | 
            +
                  meta = meta_paths.map { |f| JSON.parse(File.read(f)) }.max_by { |m| m["creation_time"] }
         | 
| 48 48 | 
             
                  etag = meta["etag"] if meta
         | 
| 49 49 |  | 
| 50 50 | 
             
                  if etag
         | 
    
        data/lib/tokenizers/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: tokenizers
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.3. | 
| 4 | 
            +
              version: 0.3.2
         | 
| 5 5 | 
             
            platform: x86_64-linux
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2023- | 
| 11 | 
            +
            date: 2023-03-07 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: 
         | 
| 14 14 | 
             
            email: andrew@ankane.org
         |