tokenizers 0.5.3-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +107 -0
  3. data/Cargo.lock +898 -0
  4. data/Cargo.toml +6 -0
  5. data/LICENSE-THIRD-PARTY.txt +17427 -0
  6. data/LICENSE.txt +202 -0
  7. data/README.md +105 -0
  8. data/lib/tokenizers/3.1/tokenizers.so +0 -0
  9. data/lib/tokenizers/3.2/tokenizers.so +0 -0
  10. data/lib/tokenizers/3.3/tokenizers.so +0 -0
  11. data/lib/tokenizers/added_token.rb +7 -0
  12. data/lib/tokenizers/char_bpe_tokenizer.rb +22 -0
  13. data/lib/tokenizers/decoders/bpe_decoder.rb +9 -0
  14. data/lib/tokenizers/decoders/ctc.rb +9 -0
  15. data/lib/tokenizers/decoders/metaspace.rb +9 -0
  16. data/lib/tokenizers/decoders/strip.rb +9 -0
  17. data/lib/tokenizers/decoders/word_piece.rb +9 -0
  18. data/lib/tokenizers/encoding.rb +19 -0
  19. data/lib/tokenizers/from_pretrained.rb +125 -0
  20. data/lib/tokenizers/models/bpe.rb +9 -0
  21. data/lib/tokenizers/models/unigram.rb +9 -0
  22. data/lib/tokenizers/models/word_level.rb +13 -0
  23. data/lib/tokenizers/models/word_piece.rb +9 -0
  24. data/lib/tokenizers/normalizers/bert_normalizer.rb +9 -0
  25. data/lib/tokenizers/normalizers/prepend.rb +9 -0
  26. data/lib/tokenizers/normalizers/strip.rb +9 -0
  27. data/lib/tokenizers/pre_tokenizers/byte_level.rb +9 -0
  28. data/lib/tokenizers/pre_tokenizers/digits.rb +9 -0
  29. data/lib/tokenizers/pre_tokenizers/metaspace.rb +9 -0
  30. data/lib/tokenizers/pre_tokenizers/punctuation.rb +9 -0
  31. data/lib/tokenizers/pre_tokenizers/split.rb +9 -0
  32. data/lib/tokenizers/processors/byte_level.rb +9 -0
  33. data/lib/tokenizers/processors/roberta_processing.rb +9 -0
  34. data/lib/tokenizers/processors/template_processing.rb +9 -0
  35. data/lib/tokenizers/tokenizer.rb +45 -0
  36. data/lib/tokenizers/trainers/bpe_trainer.rb +9 -0
  37. data/lib/tokenizers/trainers/unigram_trainer.rb +26 -0
  38. data/lib/tokenizers/trainers/word_level_trainer.rb +9 -0
  39. data/lib/tokenizers/trainers/word_piece_trainer.rb +26 -0
  40. data/lib/tokenizers/version.rb +3 -0
  41. data/lib/tokenizers.rb +62 -0
  42. metadata +85 -0
data/Cargo.toml ADDED
@@ -0,0 +1,6 @@
1
+ [workspace]
2
+ members = ["ext/tokenizers"]
3
+ resolver = "2"
4
+
5
+ [profile.release]
6
+ strip = true