tokenizers 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/Cargo.lock +32 -73
  4. data/README.md +4 -0
  5. data/ext/tokenizers/Cargo.toml +3 -1
  6. data/ext/tokenizers/src/decoders.rs +275 -6
  7. data/ext/tokenizers/src/encoding.rs +3 -2
  8. data/ext/tokenizers/src/error.rs +2 -2
  9. data/ext/tokenizers/src/lib.rs +64 -17
  10. data/ext/tokenizers/src/models.rs +372 -11
  11. data/ext/tokenizers/src/normalizers.rs +435 -7
  12. data/ext/tokenizers/src/pre_tokenizers.rs +470 -6
  13. data/ext/tokenizers/src/processors.rs +210 -0
  14. data/ext/tokenizers/src/tokenizer.rs +437 -23
  15. data/ext/tokenizers/src/trainers.rs +749 -0
  16. data/ext/tokenizers/src/utils/mod.rs +5 -0
  17. data/ext/tokenizers/src/utils/normalization.rs +85 -0
  18. data/ext/tokenizers/src/utils/regex.rs +22 -0
  19. data/lib/tokenizers/char_bpe_tokenizer.rb +9 -6
  20. data/lib/tokenizers/decoders/bpe_decoder.rb +9 -0
  21. data/lib/tokenizers/decoders/ctc.rb +9 -0
  22. data/lib/tokenizers/decoders/metaspace.rb +9 -0
  23. data/lib/tokenizers/decoders/word_piece.rb +9 -0
  24. data/lib/tokenizers/from_pretrained.rb +1 -1
  25. data/lib/tokenizers/models/bpe.rb +9 -0
  26. data/lib/tokenizers/models/unigram.rb +9 -0
  27. data/lib/tokenizers/models/word_level.rb +13 -0
  28. data/lib/tokenizers/models/word_piece.rb +9 -0
  29. data/lib/tokenizers/normalizers/bert_normalizer.rb +9 -0
  30. data/lib/tokenizers/normalizers/strip.rb +9 -0
  31. data/lib/tokenizers/pre_tokenizers/byte_level.rb +9 -0
  32. data/lib/tokenizers/pre_tokenizers/digits.rb +9 -0
  33. data/lib/tokenizers/pre_tokenizers/metaspace.rb +9 -0
  34. data/lib/tokenizers/pre_tokenizers/punctuation.rb +9 -0
  35. data/lib/tokenizers/pre_tokenizers/split.rb +9 -0
  36. data/lib/tokenizers/processors/byte_level.rb +9 -0
  37. data/lib/tokenizers/processors/roberta_processing.rb +9 -0
  38. data/lib/tokenizers/processors/template_processing.rb +9 -0
  39. data/lib/tokenizers/tokenizer.rb +40 -7
  40. data/lib/tokenizers/trainers/bpe_trainer.rb +9 -0
  41. data/lib/tokenizers/trainers/unigram_trainer.rb +26 -0
  42. data/lib/tokenizers/trainers/word_level_trainer.rb +9 -0
  43. data/lib/tokenizers/trainers/word_piece_trainer.rb +26 -0
  44. data/lib/tokenizers/version.rb +1 -1
  45. data/lib/tokenizers.rb +42 -2
  46. metadata +30 -3
@@ -0,0 +1,5 @@
1
+ mod normalization;
2
+ mod regex;
3
+
4
+ pub use normalization::*;
5
+ pub use regex::*;
@@ -0,0 +1,85 @@
1
+ use super::regex::{regex, RbRegex};
2
+ use crate::RbResult;
3
+ use magnus::{exception, Error, TryConvert, Value};
4
+ use tk::normalizer::SplitDelimiterBehavior;
5
+ use tk::pattern::Pattern;
6
+
7
+ #[derive(Clone)]
8
+ pub enum RbPattern<'p> {
9
+ Str(String),
10
+ Regex(&'p RbRegex),
11
+ }
12
+
13
+ impl TryConvert for RbPattern<'_> {
14
+ fn try_convert(obj: Value) -> RbResult<Self> {
15
+ if obj.is_kind_of(regex()) {
16
+ Ok(RbPattern::Regex(obj.try_convert()?))
17
+ } else {
18
+ Ok(RbPattern::Str(obj.try_convert()?))
19
+ }
20
+ }
21
+ }
22
+
23
+ impl Pattern for RbPattern<'_> {
24
+ fn find_matches(&self, inside: &str) -> tk::Result<Vec<(tk::Offsets, bool)>> {
25
+ match self {
26
+ RbPattern::Str(s) => {
27
+ let mut chars = s.chars();
28
+ if let (Some(c), None) = (chars.next(), chars.next()) {
29
+ c.find_matches(inside)
30
+ } else {
31
+ s.find_matches(inside)
32
+ }
33
+ }
34
+ RbPattern::Regex(_r) => {
35
+ todo!()
36
+ }
37
+ }
38
+ }
39
+ }
40
+
41
+ impl From<RbPattern<'_>> for tk::normalizers::replace::ReplacePattern {
42
+ fn from(pattern: RbPattern<'_>) -> Self {
43
+ match pattern {
44
+ RbPattern::Str(s) => Self::String(s),
45
+ RbPattern::Regex(_r) => todo!(),
46
+ }
47
+ }
48
+ }
49
+
50
+ impl From<RbPattern<'_>> for tk::pre_tokenizers::split::SplitPattern {
51
+ fn from(pattern: RbPattern<'_>) -> Self {
52
+ match pattern {
53
+ RbPattern::Str(s) => Self::String(s),
54
+ RbPattern::Regex(_r) => todo!(),
55
+ }
56
+ }
57
+ }
58
+
59
+ #[derive(Clone)]
60
+ pub struct RbSplitDelimiterBehavior(pub SplitDelimiterBehavior);
61
+
62
+ impl TryConvert for RbSplitDelimiterBehavior {
63
+ fn try_convert(obj: Value) -> RbResult<Self> {
64
+ let s = obj.try_convert::<String>()?;
65
+
66
+ Ok(Self(match s.as_str() {
67
+ "removed" => Ok(SplitDelimiterBehavior::Removed),
68
+ "isolated" => Ok(SplitDelimiterBehavior::Isolated),
69
+ "merged_with_previous" => Ok(SplitDelimiterBehavior::MergedWithPrevious),
70
+ "merged_with_next" => Ok(SplitDelimiterBehavior::MergedWithNext),
71
+ "contiguous" => Ok(SplitDelimiterBehavior::Contiguous),
72
+ _ => Err(Error::new(
73
+ exception::arg_error(),
74
+ "Wrong value for SplitDelimiterBehavior, expected one of: \
75
+ `removed, isolated, merged_with_previous, merged_with_next, contiguous`",
76
+ )),
77
+ }?))
78
+ }
79
+ }
80
+
81
+ impl From<RbSplitDelimiterBehavior> for SplitDelimiterBehavior {
82
+ fn from(v: RbSplitDelimiterBehavior) -> Self {
83
+ v.0
84
+ }
85
+ }
@@ -0,0 +1,22 @@
1
+ use onig::Regex;
2
+ use magnus::{exception, memoize, Error, Module, RClass};
3
+ use crate::{module, RbResult};
4
+
5
+ #[magnus::wrap(class = "Tokenizers::Regex")]
6
+ pub struct RbRegex {
7
+ pub inner: Regex,
8
+ pub pattern: String,
9
+ }
10
+
11
+ impl RbRegex {
12
+ pub fn new(s: String) -> RbResult<Self> {
13
+ Ok(Self {
14
+ inner: Regex::new(&s).map_err(|e| Error::new(exception::runtime_error(), e.description().to_owned()))?,
15
+ pattern: s,
16
+ })
17
+ }
18
+ }
19
+
20
+ pub fn regex() -> RClass {
21
+ *memoize!(RClass: module().const_get("Regex").unwrap())
22
+ }
@@ -1,11 +1,14 @@
1
1
  module Tokenizers
2
2
  class CharBPETokenizer
3
- def initialize(vocab, merges)
4
- @tokenizer = Tokenizer.new(BPE.new(vocab, merges))
5
- @tokenizer.add_special_tokens(["<unk>"])
6
- @tokenizer.normalizer = BertNormalizer.new
7
- @tokenizer.pre_tokenizer = BertPreTokenizer.new
8
- @tokenizer.decoder = BPEDecoder.new
3
+ def initialize(vocab, merges, unk_token: "<unk>", suffix: "</w>")
4
+ @tokenizer =
5
+ Tokenizer.new(
6
+ Models::BPE._from_file(vocab, merges, {unk_token: unk_token, end_of_word_suffix: suffix})
7
+ )
8
+ @tokenizer.add_special_tokens([unk_token])
9
+ @tokenizer.normalizer = Normalizers::BertNormalizer.new
10
+ @tokenizer.pre_tokenizer = PreTokenizers::BertPreTokenizer.new
11
+ @tokenizer.decoder = Decoders::BPEDecoder.new
9
12
  end
10
13
 
11
14
  def encode(text, **options)
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Decoders
3
+ class BPEDecoder
4
+ def self.new(suffix: "</w>")
5
+ _new(suffix)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Decoders
3
+ class CTC
4
+ def self.new(pad_token: "<pad>", word_delimiter_token: "|", cleanup: true)
5
+ _new(pad_token, word_delimiter_token, cleanup)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Decoders
3
+ class Metaspace
4
+ def self.new(replacement: "\u2581", add_prefix_space: true)
5
+ _new(replacement, add_prefix_space)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Decoders
3
+ class WordPiece
4
+ def self.new(prefix: '##', cleanup: true)
5
+ _new(prefix, cleanup)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -57,7 +57,7 @@ module Tokenizers
57
57
 
58
58
  tempfile =
59
59
  begin
60
- URI.open(url, options)
60
+ URI.parse(url).open(options)
61
61
  rescue OpenURI::HTTPError => e
62
62
  if e.message == "304 Not Modified"
63
63
  return resource_path
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Models
3
+ class BPE
4
+ def self.new(vocab: nil, merges: nil, **kwargs)
5
+ _new(vocab, merges, kwargs)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Models
3
+ class Unigram
4
+ def self.new(vocab: nil, unk_id: nil)
5
+ _new(vocab, unk_id)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ module Tokenizers
2
+ module Models
3
+ class WordLevel
4
+ def self.new(vocab: nil, unk_token: nil)
5
+ _new(vocab, unk_token)
6
+ end
7
+
8
+ def self.from_file(vocab, unk_token: nil)
9
+ _from_file(vocab, unk_token)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Models
3
+ class WordPiece
4
+ def self.new(vocab: nil, **kwargs)
5
+ _new(vocab, kwargs)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Normalizers
3
+ class BertNormalizer
4
+ def self.new(clean_text: true, handle_chinese_chars: true, strip_accents: nil, lowercase: true)
5
+ _new(clean_text, handle_chinese_chars, strip_accents, lowercase)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Normalizers
3
+ class Strip
4
+ def self.new(left: true, right: true)
5
+ _new(left, right)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module PreTokenizers
3
+ class ByteLevel
4
+ def self.new(add_prefix_space: true, use_regex: true)
5
+ _new(add_prefix_space, use_regex)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module PreTokenizers
3
+ class Digits
4
+ def self.new(individual_digits: false)
5
+ _new(individual_digits)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module PreTokenizers
3
+ class Metaspace
4
+ def self.new(replacement: "\u2581", add_prefix_space: true)
5
+ _new(replacement, add_prefix_space)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module PreTokenizers
3
+ class Punctuation
4
+ def self.new(behavior: "isolated")
5
+ _new(behavior)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module PreTokenizers
3
+ class Split
4
+ def self.new(pattern, behavior, invert: false)
5
+ _new(pattern, behavior, invert)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Processors
3
+ class ByteLevel
4
+ def self.new(trim_offsets: true)
5
+ _new(trim_offsets)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Processors
3
+ class RobertaProcessing
4
+ def self.new(sep, cls, trim_offsets: true, add_prefix_space: true)
5
+ _new(sep, cls, trim_offsets, add_prefix_space)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Processors
3
+ class TemplateProcessing
4
+ def self.new(single: nil, pair: nil, special_tokens: nil)
5
+ _new(single, pair, special_tokens)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -1,12 +1,45 @@
1
1
  module Tokenizers
2
2
  class Tokenizer
3
- # TODO change add_special_tokens default to true in 0.3.0
4
- def encode(sequence, add_special_tokens: nil)
5
- if add_special_tokens.nil?
6
- warn "[tokenizers] add_special_tokens will default to true in 0.3.0. Pass add_special_tokens: true/false to silence this warning."
7
- add_special_tokens = false
8
- end
9
- _encode(sequence, add_special_tokens)
3
+ extend FromPretrained
4
+
5
+ def to_s(pretty: false)
6
+ _to_s(pretty)
7
+ end
8
+
9
+ def save(path, pretty: false)
10
+ _save(path, pretty)
11
+ end
12
+
13
+ def encode(sequence, pair = nil, is_pretokenized: false, add_special_tokens: true)
14
+ _encode(sequence, pair, is_pretokenized, add_special_tokens)
15
+ end
16
+
17
+ def encode_batch(input, is_pretokenized: false, add_special_tokens: true)
18
+ _encode_batch(input, is_pretokenized, add_special_tokens)
19
+ end
20
+
21
+ def decode(ids, skip_special_tokens: true)
22
+ _decode(ids, skip_special_tokens)
23
+ end
24
+
25
+ def decode_batch(sequences, skip_special_tokens: true)
26
+ _decode_batch(sequences, skip_special_tokens)
27
+ end
28
+
29
+ def enable_padding(**options)
30
+ _enable_padding(options)
31
+ end
32
+
33
+ def enable_truncation(max_length, **options)
34
+ _enable_truncation(max_length, options)
35
+ end
36
+
37
+ def vocab(with_added_tokens: true)
38
+ _vocab(with_added_tokens)
39
+ end
40
+
41
+ def vocab_size(with_added_tokens: true)
42
+ _vocab_size(with_added_tokens)
10
43
  end
11
44
  end
12
45
  end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Trainers
3
+ class BpeTrainer
4
+ def self.new(**options)
5
+ _new(options)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,26 @@
1
+ module Tokenizers
2
+ module Trainers
3
+ class UnigramTrainer
4
+ def self.new(vocab_size: 8000,
5
+ show_progress: true,
6
+ special_tokens: [],
7
+ initial_alphabet: [],
8
+ shrinking_factor: 0.75,
9
+ unk_token: nil,
10
+ max_piece_length: 16,
11
+ n_sub_iterations: 2)
12
+
13
+ _new({
14
+ vocab_size: vocab_size,
15
+ show_progress: show_progress,
16
+ special_tokens: special_tokens,
17
+ initial_alphabet: initial_alphabet,
18
+ shrinking_factor: shrinking_factor,
19
+ unk_token: unk_token,
20
+ max_piece_length: max_piece_length,
21
+ n_sub_iterations: n_sub_iterations
22
+ })
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ module Tokenizers
2
+ module Trainers
3
+ class WordLevelTrainer
4
+ def self.new(**options)
5
+ _new(options)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,26 @@
1
+ module Tokenizers
2
+ module Trainers
3
+ class WordPieceTrainer
4
+ def self.new(vocab_size: 30000,
5
+ min_frequency: 0,
6
+ show_progress: true,
7
+ special_tokens: [],
8
+ limit_alphabet: nil,
9
+ initial_alphabet: [],
10
+ continuing_subword_prefix: "##",
11
+ end_of_word_suffix: nil)
12
+
13
+ _new({
14
+ vocab_size: vocab_size,
15
+ min_frequency: min_frequency,
16
+ show_progress: show_progress,
17
+ special_tokens: special_tokens,
18
+ limit_alphabet: limit_alphabet,
19
+ initial_alphabet: initial_alphabet,
20
+ continuing_subword_prefix: continuing_subword_prefix,
21
+ end_of_word_suffix: end_of_word_suffix
22
+ })
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.2.3"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -5,7 +5,41 @@ rescue LoadError
5
5
  require_relative "tokenizers/tokenizers"
6
6
  end
7
7
 
8
- # modules
8
+ # decoders
9
+ require_relative "tokenizers/decoders/bpe_decoder"
10
+ require_relative "tokenizers/decoders/ctc"
11
+ require_relative "tokenizers/decoders/metaspace"
12
+ require_relative "tokenizers/decoders/word_piece"
13
+
14
+ # models
15
+ require_relative "tokenizers/models/bpe"
16
+ require_relative "tokenizers/models/word_level"
17
+ require_relative "tokenizers/models/word_piece"
18
+ require_relative "tokenizers/models/unigram"
19
+
20
+ # normalizers
21
+ require_relative "tokenizers/normalizers/bert_normalizer"
22
+ require_relative "tokenizers/normalizers/strip"
23
+
24
+ # pre-tokenizers
25
+ require_relative "tokenizers/pre_tokenizers/byte_level"
26
+ require_relative "tokenizers/pre_tokenizers/digits"
27
+ require_relative "tokenizers/pre_tokenizers/metaspace"
28
+ require_relative "tokenizers/pre_tokenizers/punctuation"
29
+ require_relative "tokenizers/pre_tokenizers/split"
30
+
31
+ # processors
32
+ require_relative "tokenizers/processors/byte_level"
33
+ require_relative "tokenizers/processors/roberta_processing"
34
+ require_relative "tokenizers/processors/template_processing"
35
+
36
+ # trainers
37
+ require_relative "tokenizers/trainers/bpe_trainer"
38
+ require_relative "tokenizers/trainers/unigram_trainer"
39
+ require_relative "tokenizers/trainers/word_level_trainer"
40
+ require_relative "tokenizers/trainers/word_piece_trainer"
41
+
42
+ # other
9
43
  require_relative "tokenizers/char_bpe_tokenizer"
10
44
  require_relative "tokenizers/encoding"
11
45
  require_relative "tokenizers/from_pretrained"
@@ -15,5 +49,11 @@ require_relative "tokenizers/version"
15
49
  module Tokenizers
16
50
  class Error < StandardError; end
17
51
 
18
- extend FromPretrained
52
+ def self.from_pretrained(...)
53
+ Tokenizer.from_pretrained(...)
54
+ end
55
+
56
+ def self.from_file(...)
57
+ Tokenizer.from_file(...)
58
+ end
19
59
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-22 00:00:00.000000000 Z
11
+ date: 2023-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -45,12 +45,39 @@ files:
45
45
  - ext/tokenizers/src/models.rs
46
46
  - ext/tokenizers/src/normalizers.rs
47
47
  - ext/tokenizers/src/pre_tokenizers.rs
48
+ - ext/tokenizers/src/processors.rs
48
49
  - ext/tokenizers/src/tokenizer.rs
50
+ - ext/tokenizers/src/trainers.rs
51
+ - ext/tokenizers/src/utils/mod.rs
52
+ - ext/tokenizers/src/utils/normalization.rs
53
+ - ext/tokenizers/src/utils/regex.rs
49
54
  - lib/tokenizers.rb
50
55
  - lib/tokenizers/char_bpe_tokenizer.rb
56
+ - lib/tokenizers/decoders/bpe_decoder.rb
57
+ - lib/tokenizers/decoders/ctc.rb
58
+ - lib/tokenizers/decoders/metaspace.rb
59
+ - lib/tokenizers/decoders/word_piece.rb
51
60
  - lib/tokenizers/encoding.rb
52
61
  - lib/tokenizers/from_pretrained.rb
62
+ - lib/tokenizers/models/bpe.rb
63
+ - lib/tokenizers/models/unigram.rb
64
+ - lib/tokenizers/models/word_level.rb
65
+ - lib/tokenizers/models/word_piece.rb
66
+ - lib/tokenizers/normalizers/bert_normalizer.rb
67
+ - lib/tokenizers/normalizers/strip.rb
68
+ - lib/tokenizers/pre_tokenizers/byte_level.rb
69
+ - lib/tokenizers/pre_tokenizers/digits.rb
70
+ - lib/tokenizers/pre_tokenizers/metaspace.rb
71
+ - lib/tokenizers/pre_tokenizers/punctuation.rb
72
+ - lib/tokenizers/pre_tokenizers/split.rb
73
+ - lib/tokenizers/processors/byte_level.rb
74
+ - lib/tokenizers/processors/roberta_processing.rb
75
+ - lib/tokenizers/processors/template_processing.rb
53
76
  - lib/tokenizers/tokenizer.rb
77
+ - lib/tokenizers/trainers/bpe_trainer.rb
78
+ - lib/tokenizers/trainers/unigram_trainer.rb
79
+ - lib/tokenizers/trainers/word_level_trainer.rb
80
+ - lib/tokenizers/trainers/word_piece_trainer.rb
54
81
  - lib/tokenizers/version.rb
55
82
  homepage: https://github.com/ankane/tokenizers-ruby
56
83
  licenses:
@@ -71,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
71
98
  - !ruby/object:Gem::Version
72
99
  version: '0'
73
100
  requirements: []
74
- rubygems_version: 3.4.1
101
+ rubygems_version: 3.4.6
75
102
  signing_key:
76
103
  specification_version: 4
77
104
  summary: Fast state-of-the-art tokenizers for Ruby