tokenizers 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +189 -219
- data/ext/tokenizers/Cargo.toml +4 -4
- data/ext/tokenizers/src/decoders.rs +31 -10
- data/ext/tokenizers/src/normalizers.rs +2 -2
- data/ext/tokenizers/src/pre_tokenizers.rs +54 -18
- data/ext/tokenizers/src/tokenizer.rs +11 -11
- data/ext/tokenizers/src/trainers.rs +16 -16
- data/lib/tokenizers/decoders/metaspace.rb +2 -2
- data/lib/tokenizers/from_pretrained.rb +2 -2
- data/lib/tokenizers/pre_tokenizers/metaspace.rb +2 -2
- data/lib/tokenizers/version.rb +1 -1
- metadata +4 -4
data/ext/tokenizers/Cargo.toml
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
[package]
|
2
2
|
name = "tokenizers"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.1"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.
|
7
|
+
rust-version = "1.63.0"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
11
11
|
crate-type = ["cdylib"]
|
12
12
|
|
13
13
|
[dependencies]
|
14
|
-
magnus = "0.
|
14
|
+
magnus = "0.7"
|
15
15
|
onig = { version = "6", default-features = false }
|
16
16
|
serde = { version = "1", features = ["rc", "derive"] }
|
17
17
|
|
18
18
|
[dependencies.tokenizers]
|
19
|
-
version = "=0.
|
19
|
+
version = "=0.20.0" # also update in from_pretrained.rb
|
20
20
|
default-features = false
|
21
21
|
features = ["progressbar", "onig", "esaxx_fast"]
|
@@ -1,5 +1,6 @@
|
|
1
1
|
use std::sync::{Arc, RwLock};
|
2
2
|
|
3
|
+
use crate::pre_tokenizers::from_string;
|
3
4
|
use magnus::value::Lazy;
|
4
5
|
use magnus::{
|
5
6
|
data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
|
@@ -11,7 +12,7 @@ use tk::decoders::byte_fallback::ByteFallback;
|
|
11
12
|
use tk::decoders::byte_level::ByteLevel;
|
12
13
|
use tk::decoders::ctc::CTC;
|
13
14
|
use tk::decoders::fuse::Fuse;
|
14
|
-
use tk::decoders::metaspace::Metaspace;
|
15
|
+
use tk::decoders::metaspace::{Metaspace, PrependScheme};
|
15
16
|
use tk::decoders::strip::Strip;
|
16
17
|
use tk::decoders::wordpiece::WordPiece;
|
17
18
|
use tk::decoders::DecoderWrapper;
|
@@ -126,12 +127,29 @@ impl RbDecoder {
|
|
126
127
|
setter!(self, Metaspace, @set_replacement, replacement);
|
127
128
|
}
|
128
129
|
|
129
|
-
pub fn
|
130
|
-
getter!(self, Metaspace,
|
130
|
+
pub fn metaspace_split(&self) -> bool {
|
131
|
+
getter!(self, Metaspace, get_split())
|
131
132
|
}
|
132
133
|
|
133
|
-
pub fn
|
134
|
-
setter!(self, Metaspace,
|
134
|
+
pub fn metaspace_set_split(&self, split: bool) {
|
135
|
+
setter!(self, Metaspace, @set_split, split);
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn metaspace_prepend_scheme(&self) -> String {
|
139
|
+
// Assuming Metaspace has a method to get the prepend_scheme as a string
|
140
|
+
let scheme: PrependScheme = getter!(self, Metaspace, get_prepend_scheme());
|
141
|
+
match scheme {
|
142
|
+
PrependScheme::First => "first",
|
143
|
+
PrependScheme::Never => "never",
|
144
|
+
PrependScheme::Always => "always",
|
145
|
+
}
|
146
|
+
.to_string()
|
147
|
+
}
|
148
|
+
|
149
|
+
pub fn metaspace_set_prepend_scheme(&self, prepend_scheme: String) -> RbResult<()> {
|
150
|
+
let scheme = from_string(prepend_scheme)?;
|
151
|
+
setter!(self, Metaspace, @set_prepend_scheme, scheme);
|
152
|
+
Ok(())
|
135
153
|
}
|
136
154
|
|
137
155
|
pub fn word_piece_cleanup(&self) -> bool {
|
@@ -194,8 +212,9 @@ impl RbFuse {
|
|
194
212
|
pub struct RbMetaspaceDecoder {}
|
195
213
|
|
196
214
|
impl RbMetaspaceDecoder {
|
197
|
-
pub fn new(replacement: char,
|
198
|
-
|
215
|
+
pub fn new(replacement: char, prepend_scheme: String, split: bool) -> RbResult<RbDecoder> {
|
216
|
+
let prepend_scheme = from_string(prepend_scheme)?;
|
217
|
+
Ok(Metaspace::new(replacement, prepend_scheme, split).into())
|
199
218
|
}
|
200
219
|
}
|
201
220
|
|
@@ -364,11 +383,13 @@ pub fn init_decoders(ruby: &Ruby, module: &RModule) -> RbResult<()> {
|
|
364
383
|
class.define_singleton_method("new", function!(RbFuse::new, 0))?;
|
365
384
|
|
366
385
|
let class = module.define_class("Metaspace", decoder)?;
|
367
|
-
class.define_singleton_method("_new", function!(RbMetaspaceDecoder::new,
|
368
|
-
class.define_method("
|
369
|
-
class.define_method("
|
386
|
+
class.define_singleton_method("_new", function!(RbMetaspaceDecoder::new, 3))?;
|
387
|
+
class.define_method("prepend_scheme", method!(RbDecoder::metaspace_prepend_scheme, 0))?;
|
388
|
+
class.define_method("prepend_scheme=", method!(RbDecoder::metaspace_set_prepend_scheme, 1))?;
|
370
389
|
class.define_method("replacement", method!(RbDecoder::metaspace_replacement, 0))?;
|
371
390
|
class.define_method("replacement=", method!(RbDecoder::metaspace_set_replacement, 1))?;
|
391
|
+
class.define_method("split", method!(RbDecoder::metaspace_split, 0))?;
|
392
|
+
class.define_method("split=", method!(RbDecoder::metaspace_set_split, 1))?;
|
372
393
|
|
373
394
|
let class = module.define_class("Replace", decoder)?;
|
374
395
|
class.define_singleton_method("new", function!(RbReplaceDecoder::new, 2))?;
|
@@ -222,8 +222,8 @@ pub struct RbSequence {}
|
|
222
222
|
impl RbSequence {
|
223
223
|
fn new(normalizers: RArray) -> RbResult<RbNormalizer> {
|
224
224
|
let mut sequence = Vec::with_capacity(normalizers.len());
|
225
|
-
for n in normalizers.
|
226
|
-
let normalizer: &RbNormalizer = TryConvert::try_convert(n
|
225
|
+
for n in normalizers.into_iter() {
|
226
|
+
let normalizer: &RbNormalizer = TryConvert::try_convert(n)?;
|
227
227
|
match &normalizer.normalizer {
|
228
228
|
RbNormalizerTypeWrapper::Sequence(inner) => sequence.extend(inner.iter().cloned()),
|
229
229
|
RbNormalizerTypeWrapper::Single(inner) => sequence.push(inner.clone()),
|
@@ -1,7 +1,7 @@
|
|
1
1
|
use std::sync::{Arc, RwLock};
|
2
2
|
|
3
3
|
use magnus::{
|
4
|
-
data_type_builder, function, method, value::Lazy, Class, DataType, DataTypeFunctions, Module, Object,
|
4
|
+
data_type_builder, exception, function, method, value::Lazy, Class, DataType, DataTypeFunctions, Error, Module, Object,
|
5
5
|
RArray, RClass, RModule, Ruby, TryConvert, TypedData,
|
6
6
|
};
|
7
7
|
|
@@ -12,7 +12,7 @@ use tk::pre_tokenizers::bert::BertPreTokenizer;
|
|
12
12
|
use tk::pre_tokenizers::byte_level::ByteLevel;
|
13
13
|
use tk::pre_tokenizers::delimiter::CharDelimiterSplit;
|
14
14
|
use tk::pre_tokenizers::digits::Digits;
|
15
|
-
use tk::pre_tokenizers::metaspace::Metaspace;
|
15
|
+
use tk::pre_tokenizers::metaspace::{Metaspace, PrependScheme};
|
16
16
|
use tk::pre_tokenizers::punctuation::Punctuation;
|
17
17
|
use tk::pre_tokenizers::split::Split;
|
18
18
|
use tk::pre_tokenizers::unicode_scripts::UnicodeScripts;
|
@@ -118,14 +118,6 @@ impl RbPreTokenizer {
|
|
118
118
|
setter!(self, Digits, individual_digits, individual_digits);
|
119
119
|
}
|
120
120
|
|
121
|
-
fn metaspace_add_prefix_space(&self) -> bool {
|
122
|
-
getter!(self, Metaspace, add_prefix_space)
|
123
|
-
}
|
124
|
-
|
125
|
-
fn metaspace_set_add_prefix_space(&self, add_prefix_space: bool) {
|
126
|
-
setter!(self, Metaspace, add_prefix_space, add_prefix_space);
|
127
|
-
}
|
128
|
-
|
129
121
|
fn metaspace_replacement(&self) -> String {
|
130
122
|
getter!(self, Metaspace, get_replacement().to_string())
|
131
123
|
}
|
@@ -133,6 +125,31 @@ impl RbPreTokenizer {
|
|
133
125
|
fn metaspace_set_replacement(&self, replacement: char) {
|
134
126
|
setter!(self, Metaspace, @set_replacement, replacement);
|
135
127
|
}
|
128
|
+
|
129
|
+
fn metaspace_split(&self) -> bool {
|
130
|
+
getter!(self, Metaspace, get_split())
|
131
|
+
}
|
132
|
+
|
133
|
+
fn metaspace_set_split(&self, split: bool) {
|
134
|
+
setter!(self, Metaspace, @set_split, split);
|
135
|
+
}
|
136
|
+
|
137
|
+
fn metaspace_prepend_scheme(&self) -> String {
|
138
|
+
// Assuming Metaspace has a method to get the prepend_scheme as a string
|
139
|
+
let scheme: PrependScheme = getter!(self, Metaspace, get_prepend_scheme());
|
140
|
+
match scheme {
|
141
|
+
PrependScheme::First => "first",
|
142
|
+
PrependScheme::Never => "never",
|
143
|
+
PrependScheme::Always => "always",
|
144
|
+
}
|
145
|
+
.to_string()
|
146
|
+
}
|
147
|
+
|
148
|
+
fn metaspace_set_prepend_scheme(&self, prepend_scheme: String) -> RbResult<()> {
|
149
|
+
let scheme = from_string(prepend_scheme)?;
|
150
|
+
setter!(self, Metaspace, @set_prepend_scheme, scheme);
|
151
|
+
Ok(())
|
152
|
+
}
|
136
153
|
}
|
137
154
|
|
138
155
|
impl PreTokenizer for RbPreTokenizer {
|
@@ -180,9 +197,11 @@ pub struct RbMetaspace {}
|
|
180
197
|
impl RbMetaspace {
|
181
198
|
fn new(
|
182
199
|
replacement: char,
|
183
|
-
|
184
|
-
|
185
|
-
|
200
|
+
prepend_scheme: String,
|
201
|
+
split: bool,
|
202
|
+
) -> RbResult<RbPreTokenizer> {
|
203
|
+
let prepend_scheme = from_string(prepend_scheme)?;
|
204
|
+
Ok(Metaspace::new(replacement, prepend_scheme, split).into())
|
186
205
|
}
|
187
206
|
}
|
188
207
|
|
@@ -239,8 +258,8 @@ pub struct RbSequence {}
|
|
239
258
|
impl RbSequence {
|
240
259
|
fn new(pre_tokenizers: RArray) -> RbResult<RbPreTokenizer> {
|
241
260
|
let mut sequence = Vec::with_capacity(pre_tokenizers.len());
|
242
|
-
for n in pre_tokenizers.
|
243
|
-
let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n
|
261
|
+
for n in pre_tokenizers.into_iter() {
|
262
|
+
let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n)?;
|
244
263
|
match &pretokenizer.pretok {
|
245
264
|
RbPreTokenizerTypeWrapper::Sequence(inner) => {
|
246
265
|
sequence.extend(inner.iter().cloned())
|
@@ -252,6 +271,21 @@ impl RbSequence {
|
|
252
271
|
}
|
253
272
|
}
|
254
273
|
|
274
|
+
pub(crate) fn from_string(string: String) -> RbResult<PrependScheme> {
|
275
|
+
let scheme = match string.as_str() {
|
276
|
+
"first" => PrependScheme::First,
|
277
|
+
"never" => PrependScheme::Never,
|
278
|
+
"always" => PrependScheme::Always,
|
279
|
+
_ => {
|
280
|
+
return Err(Error::new(exception::arg_error(), format!(
|
281
|
+
"{} is an unknown variant, should be one of ['first', 'never', 'always']",
|
282
|
+
string
|
283
|
+
)));
|
284
|
+
}
|
285
|
+
};
|
286
|
+
Ok(scheme)
|
287
|
+
}
|
288
|
+
|
255
289
|
#[derive(Clone, Deserialize)]
|
256
290
|
#[serde(untagged)]
|
257
291
|
pub(crate) enum RbPreTokenizerWrapper {
|
@@ -465,11 +499,13 @@ pub fn init_pre_tokenizers(ruby: &Ruby, module: &RModule) -> RbResult<()> {
|
|
465
499
|
class.define_method("individual_digits=", method!(RbPreTokenizer::digits_set_individual_digits, 1))?;
|
466
500
|
|
467
501
|
let class = module.define_class("Metaspace", pre_tokenizer)?;
|
468
|
-
class.define_singleton_method("_new", function!(RbMetaspace::new,
|
469
|
-
class.define_method("
|
470
|
-
class.define_method("
|
502
|
+
class.define_singleton_method("_new", function!(RbMetaspace::new, 3))?;
|
503
|
+
class.define_method("prepend_scheme", method!(RbPreTokenizer::metaspace_prepend_scheme, 0))?;
|
504
|
+
class.define_method("prepend_scheme=", method!(RbPreTokenizer::metaspace_set_prepend_scheme, 1))?;
|
471
505
|
class.define_method("replacement", method!(RbPreTokenizer::metaspace_replacement, 0))?;
|
472
506
|
class.define_method("replacement=", method!(RbPreTokenizer::metaspace_set_replacement, 1))?;
|
507
|
+
class.define_method("split", method!(RbPreTokenizer::metaspace_split, 0))?;
|
508
|
+
class.define_method("split=", method!(RbPreTokenizer::metaspace_set_split, 1))?;
|
473
509
|
|
474
510
|
let class = module.define_class("Punctuation", pre_tokenizer)?;
|
475
511
|
class.define_singleton_method("_new", function!(RbPunctuation::new, 1))?;
|
@@ -282,12 +282,12 @@ impl RbTokenizer {
|
|
282
282
|
add_special_tokens: bool,
|
283
283
|
) -> RbResult<RArray> {
|
284
284
|
let input: Vec<tk::EncodeInput> = input
|
285
|
-
.
|
285
|
+
.into_iter()
|
286
286
|
.map(|o| {
|
287
287
|
let input: tk::EncodeInput = if is_pretokenized {
|
288
|
-
PreTokenizedEncodeInput::try_convert(o
|
288
|
+
PreTokenizedEncodeInput::try_convert(o)?.into()
|
289
289
|
} else {
|
290
|
-
TextEncodeInput::try_convert(o
|
290
|
+
TextEncodeInput::try_convert(o)?.into()
|
291
291
|
};
|
292
292
|
Ok(input)
|
293
293
|
})
|
@@ -319,26 +319,26 @@ impl RbTokenizer {
|
|
319
319
|
.map_err(RbError::from)
|
320
320
|
}
|
321
321
|
|
322
|
-
pub fn set_decoder(&self, decoder:
|
323
|
-
self.tokenizer.borrow_mut().with_decoder(decoder.
|
322
|
+
pub fn set_decoder(&self, decoder: Option<&RbDecoder>) {
|
323
|
+
self.tokenizer.borrow_mut().with_decoder(decoder.cloned());
|
324
324
|
}
|
325
325
|
|
326
|
-
pub fn set_pre_tokenizer(&self, pretok:
|
326
|
+
pub fn set_pre_tokenizer(&self, pretok: Option<&RbPreTokenizer>) {
|
327
327
|
self.tokenizer
|
328
328
|
.borrow_mut()
|
329
|
-
.with_pre_tokenizer(pretok.
|
329
|
+
.with_pre_tokenizer(pretok.cloned());
|
330
330
|
}
|
331
331
|
|
332
|
-
pub fn set_post_processor(&self, processor:
|
332
|
+
pub fn set_post_processor(&self, processor: Option<&RbPostProcessor>) {
|
333
333
|
self.tokenizer
|
334
334
|
.borrow_mut()
|
335
|
-
.with_post_processor(processor.
|
335
|
+
.with_post_processor(processor.cloned());
|
336
336
|
}
|
337
337
|
|
338
|
-
pub fn set_normalizer(&self, normalizer:
|
338
|
+
pub fn set_normalizer(&self, normalizer: Option<&RbNormalizer>) {
|
339
339
|
self.tokenizer
|
340
340
|
.borrow_mut()
|
341
|
-
.with_normalizer(normalizer.
|
341
|
+
.with_normalizer(normalizer.cloned());
|
342
342
|
}
|
343
343
|
|
344
344
|
pub fn token_to_id(&self, token: String) -> Option<u32> {
|
@@ -110,9 +110,9 @@ impl RbTrainer {
|
|
110
110
|
BpeTrainer,
|
111
111
|
special_tokens,
|
112
112
|
special_tokens
|
113
|
-
.
|
113
|
+
.into_iter()
|
114
114
|
.map(|token| {
|
115
|
-
if let Ok(content) = String::try_convert(token
|
115
|
+
if let Ok(content) = String::try_convert(token) {
|
116
116
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
117
117
|
} else {
|
118
118
|
todo!()
|
@@ -197,9 +197,9 @@ impl RbTrainer {
|
|
197
197
|
UnigramTrainer,
|
198
198
|
special_tokens,
|
199
199
|
special_tokens
|
200
|
-
.
|
200
|
+
.into_iter()
|
201
201
|
.map(|token| {
|
202
|
-
if let Ok(content) = String::try_convert(token
|
202
|
+
if let Ok(content) = String::try_convert(token) {
|
203
203
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
204
204
|
} else {
|
205
205
|
todo!()
|
@@ -268,9 +268,9 @@ impl RbTrainer {
|
|
268
268
|
WordLevelTrainer,
|
269
269
|
special_tokens,
|
270
270
|
special_tokens
|
271
|
-
.
|
271
|
+
.into_iter()
|
272
272
|
.map(|token| {
|
273
|
-
if let Ok(content) = String::try_convert(token
|
273
|
+
if let Ok(content) = String::try_convert(token) {
|
274
274
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
275
275
|
} else {
|
276
276
|
todo!()
|
@@ -322,9 +322,9 @@ impl RbTrainer {
|
|
322
322
|
WordPieceTrainer,
|
323
323
|
@set_special_tokens,
|
324
324
|
special_tokens
|
325
|
-
.
|
325
|
+
.into_iter()
|
326
326
|
.map(|token| {
|
327
|
-
if let Ok(content) = String::try_convert(token
|
327
|
+
if let Ok(content) = String::try_convert(token) {
|
328
328
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
329
329
|
} else {
|
330
330
|
todo!()
|
@@ -398,9 +398,9 @@ impl RbBpeTrainer {
|
|
398
398
|
if !value.is_nil() {
|
399
399
|
builder = builder.special_tokens(
|
400
400
|
RArray::try_convert(value)?
|
401
|
-
.
|
401
|
+
.into_iter()
|
402
402
|
.map(|token| {
|
403
|
-
if let Ok(content) = String::try_convert(token
|
403
|
+
if let Ok(content) = String::try_convert(token) {
|
404
404
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
405
405
|
} else {
|
406
406
|
todo!()
|
@@ -466,9 +466,9 @@ impl RbUnigramTrainer {
|
|
466
466
|
if !value.is_nil() {
|
467
467
|
builder.special_tokens(
|
468
468
|
RArray::try_convert(value)?
|
469
|
-
.
|
469
|
+
.into_iter()
|
470
470
|
.map(|token| {
|
471
|
-
if let Ok(content) = String::try_convert(token
|
471
|
+
if let Ok(content) = String::try_convert(token) {
|
472
472
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
473
473
|
} else {
|
474
474
|
todo!()
|
@@ -540,9 +540,9 @@ impl RbWordLevelTrainer {
|
|
540
540
|
if !value.is_nil() {
|
541
541
|
builder.special_tokens(
|
542
542
|
RArray::try_convert(value)?
|
543
|
-
.
|
543
|
+
.into_iter()
|
544
544
|
.map(|token| {
|
545
|
-
if let Ok(content) = String::try_convert(token
|
545
|
+
if let Ok(content) = String::try_convert(token) {
|
546
546
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
547
547
|
} else {
|
548
548
|
todo!()
|
@@ -581,9 +581,9 @@ impl RbWordPieceTrainer {
|
|
581
581
|
if !value.is_nil() {
|
582
582
|
builder = builder.special_tokens(
|
583
583
|
RArray::try_convert(value)?
|
584
|
-
.
|
584
|
+
.into_iter()
|
585
585
|
.map(|token| {
|
586
|
-
if let Ok(content) = String::try_convert(token
|
586
|
+
if let Ok(content) = String::try_convert(token) {
|
587
587
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
588
588
|
} else {
|
589
589
|
todo!()
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Tokenizers
|
2
2
|
module Decoders
|
3
3
|
class Metaspace
|
4
|
-
def self.new(replacement: "\u2581",
|
5
|
-
_new(replacement,
|
4
|
+
def self.new(replacement: "\u2581", prepend_scheme: "always", split: true)
|
5
|
+
_new(replacement, prepend_scheme, split)
|
6
6
|
end
|
7
7
|
end
|
8
8
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Tokenizers
|
2
2
|
module FromPretrained
|
3
3
|
# for user agent
|
4
|
-
TOKENIZERS_VERSION = "0.
|
4
|
+
TOKENIZERS_VERSION = "0.20.0"
|
5
5
|
|
6
6
|
# use Ruby for downloads
|
7
7
|
# this avoids the need to vendor OpenSSL on Linux
|
@@ -67,7 +67,7 @@ module Tokenizers
|
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
|
-
options[:content_length_proc] = ->
|
70
|
+
options[:content_length_proc] = ->(_) { puts "Downloading..." }
|
71
71
|
|
72
72
|
# string options are headers
|
73
73
|
tempfile = URI.parse(url).open(headers.merge(options))
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Tokenizers
|
2
2
|
module PreTokenizers
|
3
3
|
class Metaspace
|
4
|
-
def self.new(replacement: "\u2581",
|
5
|
-
_new(replacement,
|
4
|
+
def self.new(replacement: "\u2581", prepend_scheme: "always", split: true)
|
5
|
+
_new(replacement, prepend_scheme, split)
|
6
6
|
end
|
7
7
|
end
|
8
8
|
end
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -93,14 +93,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '3'
|
96
|
+
version: '3.1'
|
97
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
98
|
requirements:
|
99
99
|
- - ">="
|
100
100
|
- !ruby/object:Gem::Version
|
101
101
|
version: '0'
|
102
102
|
requirements: []
|
103
|
-
rubygems_version: 3.5.
|
103
|
+
rubygems_version: 3.5.11
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: Fast state-of-the-art tokenizers for Ruby
|