tokenizers 0.3.2-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +56 -0
  3. data/Cargo.lock +873 -0
  4. data/Cargo.toml +5 -0
  5. data/LICENSE-THIRD-PARTY.txt +17286 -0
  6. data/LICENSE.txt +202 -0
  7. data/README.md +69 -0
  8. data/lib/tokenizers/2.7/tokenizers.so +0 -0
  9. data/lib/tokenizers/3.0/tokenizers.so +0 -0
  10. data/lib/tokenizers/3.1/tokenizers.so +0 -0
  11. data/lib/tokenizers/3.2/tokenizers.so +0 -0
  12. data/lib/tokenizers/char_bpe_tokenizer.rb +22 -0
  13. data/lib/tokenizers/decoders/bpe_decoder.rb +9 -0
  14. data/lib/tokenizers/decoders/ctc.rb +9 -0
  15. data/lib/tokenizers/decoders/metaspace.rb +9 -0
  16. data/lib/tokenizers/decoders/word_piece.rb +9 -0
  17. data/lib/tokenizers/encoding.rb +19 -0
  18. data/lib/tokenizers/from_pretrained.rb +119 -0
  19. data/lib/tokenizers/models/bpe.rb +9 -0
  20. data/lib/tokenizers/models/unigram.rb +9 -0
  21. data/lib/tokenizers/models/word_level.rb +13 -0
  22. data/lib/tokenizers/models/word_piece.rb +9 -0
  23. data/lib/tokenizers/normalizers/bert_normalizer.rb +9 -0
  24. data/lib/tokenizers/normalizers/strip.rb +9 -0
  25. data/lib/tokenizers/pre_tokenizers/byte_level.rb +9 -0
  26. data/lib/tokenizers/pre_tokenizers/digits.rb +9 -0
  27. data/lib/tokenizers/pre_tokenizers/metaspace.rb +9 -0
  28. data/lib/tokenizers/pre_tokenizers/punctuation.rb +9 -0
  29. data/lib/tokenizers/pre_tokenizers/split.rb +9 -0
  30. data/lib/tokenizers/processors/byte_level.rb +9 -0
  31. data/lib/tokenizers/processors/roberta_processing.rb +9 -0
  32. data/lib/tokenizers/processors/template_processing.rb +9 -0
  33. data/lib/tokenizers/tokenizer.rb +45 -0
  34. data/lib/tokenizers/trainers/bpe_trainer.rb +9 -0
  35. data/lib/tokenizers/trainers/unigram_trainer.rb +26 -0
  36. data/lib/tokenizers/trainers/word_level_trainer.rb +9 -0
  37. data/lib/tokenizers/trainers/word_piece_trainer.rb +26 -0
  38. data/lib/tokenizers/version.rb +3 -0
  39. data/lib/tokenizers.rb +59 -0
  40. metadata +83 -0
data/Cargo.toml ADDED
@@ -0,0 +1,5 @@
1
+ [workspace]
2
+ members = ["ext/tokenizers"]
3
+
4
+ [profile.release]
5
+ strip = true