tokenizers 0.4.4 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +189 -219
- data/ext/tokenizers/Cargo.toml +4 -4
- data/ext/tokenizers/src/decoders.rs +31 -10
- data/ext/tokenizers/src/normalizers.rs +2 -2
- data/ext/tokenizers/src/pre_tokenizers.rs +54 -18
- data/ext/tokenizers/src/tokenizer.rs +11 -11
- data/ext/tokenizers/src/trainers.rs +16 -16
- data/lib/tokenizers/decoders/metaspace.rb +2 -2
- data/lib/tokenizers/from_pretrained.rb +2 -2
- data/lib/tokenizers/pre_tokenizers/metaspace.rb +2 -2
- data/lib/tokenizers/version.rb +1 -1
- metadata +4 -4
data/ext/tokenizers/Cargo.toml
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
[package]
|
2
2
|
name = "tokenizers"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.1"
|
4
4
|
license = "Apache-2.0"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
7
|
-
rust-version = "1.
|
7
|
+
rust-version = "1.63.0"
|
8
8
|
publish = false
|
9
9
|
|
10
10
|
[lib]
|
11
11
|
crate-type = ["cdylib"]
|
12
12
|
|
13
13
|
[dependencies]
|
14
|
-
magnus = "0.
|
14
|
+
magnus = "0.7"
|
15
15
|
onig = { version = "6", default-features = false }
|
16
16
|
serde = { version = "1", features = ["rc", "derive"] }
|
17
17
|
|
18
18
|
[dependencies.tokenizers]
|
19
|
-
version = "=0.
|
19
|
+
version = "=0.20.0" # also update in from_pretrained.rb
|
20
20
|
default-features = false
|
21
21
|
features = ["progressbar", "onig", "esaxx_fast"]
|
@@ -1,5 +1,6 @@
|
|
1
1
|
use std::sync::{Arc, RwLock};
|
2
2
|
|
3
|
+
use crate::pre_tokenizers::from_string;
|
3
4
|
use magnus::value::Lazy;
|
4
5
|
use magnus::{
|
5
6
|
data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
|
@@ -11,7 +12,7 @@ use tk::decoders::byte_fallback::ByteFallback;
|
|
11
12
|
use tk::decoders::byte_level::ByteLevel;
|
12
13
|
use tk::decoders::ctc::CTC;
|
13
14
|
use tk::decoders::fuse::Fuse;
|
14
|
-
use tk::decoders::metaspace::Metaspace;
|
15
|
+
use tk::decoders::metaspace::{Metaspace, PrependScheme};
|
15
16
|
use tk::decoders::strip::Strip;
|
16
17
|
use tk::decoders::wordpiece::WordPiece;
|
17
18
|
use tk::decoders::DecoderWrapper;
|
@@ -126,12 +127,29 @@ impl RbDecoder {
|
|
126
127
|
setter!(self, Metaspace, @set_replacement, replacement);
|
127
128
|
}
|
128
129
|
|
129
|
-
pub fn
|
130
|
-
getter!(self, Metaspace,
|
130
|
+
pub fn metaspace_split(&self) -> bool {
|
131
|
+
getter!(self, Metaspace, get_split())
|
131
132
|
}
|
132
133
|
|
133
|
-
pub fn
|
134
|
-
setter!(self, Metaspace,
|
134
|
+
pub fn metaspace_set_split(&self, split: bool) {
|
135
|
+
setter!(self, Metaspace, @set_split, split);
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn metaspace_prepend_scheme(&self) -> String {
|
139
|
+
// Assuming Metaspace has a method to get the prepend_scheme as a string
|
140
|
+
let scheme: PrependScheme = getter!(self, Metaspace, get_prepend_scheme());
|
141
|
+
match scheme {
|
142
|
+
PrependScheme::First => "first",
|
143
|
+
PrependScheme::Never => "never",
|
144
|
+
PrependScheme::Always => "always",
|
145
|
+
}
|
146
|
+
.to_string()
|
147
|
+
}
|
148
|
+
|
149
|
+
pub fn metaspace_set_prepend_scheme(&self, prepend_scheme: String) -> RbResult<()> {
|
150
|
+
let scheme = from_string(prepend_scheme)?;
|
151
|
+
setter!(self, Metaspace, @set_prepend_scheme, scheme);
|
152
|
+
Ok(())
|
135
153
|
}
|
136
154
|
|
137
155
|
pub fn word_piece_cleanup(&self) -> bool {
|
@@ -194,8 +212,9 @@ impl RbFuse {
|
|
194
212
|
pub struct RbMetaspaceDecoder {}
|
195
213
|
|
196
214
|
impl RbMetaspaceDecoder {
|
197
|
-
pub fn new(replacement: char,
|
198
|
-
|
215
|
+
pub fn new(replacement: char, prepend_scheme: String, split: bool) -> RbResult<RbDecoder> {
|
216
|
+
let prepend_scheme = from_string(prepend_scheme)?;
|
217
|
+
Ok(Metaspace::new(replacement, prepend_scheme, split).into())
|
199
218
|
}
|
200
219
|
}
|
201
220
|
|
@@ -364,11 +383,13 @@ pub fn init_decoders(ruby: &Ruby, module: &RModule) -> RbResult<()> {
|
|
364
383
|
class.define_singleton_method("new", function!(RbFuse::new, 0))?;
|
365
384
|
|
366
385
|
let class = module.define_class("Metaspace", decoder)?;
|
367
|
-
class.define_singleton_method("_new", function!(RbMetaspaceDecoder::new,
|
368
|
-
class.define_method("
|
369
|
-
class.define_method("
|
386
|
+
class.define_singleton_method("_new", function!(RbMetaspaceDecoder::new, 3))?;
|
387
|
+
class.define_method("prepend_scheme", method!(RbDecoder::metaspace_prepend_scheme, 0))?;
|
388
|
+
class.define_method("prepend_scheme=", method!(RbDecoder::metaspace_set_prepend_scheme, 1))?;
|
370
389
|
class.define_method("replacement", method!(RbDecoder::metaspace_replacement, 0))?;
|
371
390
|
class.define_method("replacement=", method!(RbDecoder::metaspace_set_replacement, 1))?;
|
391
|
+
class.define_method("split", method!(RbDecoder::metaspace_split, 0))?;
|
392
|
+
class.define_method("split=", method!(RbDecoder::metaspace_set_split, 1))?;
|
372
393
|
|
373
394
|
let class = module.define_class("Replace", decoder)?;
|
374
395
|
class.define_singleton_method("new", function!(RbReplaceDecoder::new, 2))?;
|
@@ -222,8 +222,8 @@ pub struct RbSequence {}
|
|
222
222
|
impl RbSequence {
|
223
223
|
fn new(normalizers: RArray) -> RbResult<RbNormalizer> {
|
224
224
|
let mut sequence = Vec::with_capacity(normalizers.len());
|
225
|
-
for n in normalizers.
|
226
|
-
let normalizer: &RbNormalizer = TryConvert::try_convert(n
|
225
|
+
for n in normalizers.into_iter() {
|
226
|
+
let normalizer: &RbNormalizer = TryConvert::try_convert(n)?;
|
227
227
|
match &normalizer.normalizer {
|
228
228
|
RbNormalizerTypeWrapper::Sequence(inner) => sequence.extend(inner.iter().cloned()),
|
229
229
|
RbNormalizerTypeWrapper::Single(inner) => sequence.push(inner.clone()),
|
@@ -1,7 +1,7 @@
|
|
1
1
|
use std::sync::{Arc, RwLock};
|
2
2
|
|
3
3
|
use magnus::{
|
4
|
-
data_type_builder, function, method, value::Lazy, Class, DataType, DataTypeFunctions, Module, Object,
|
4
|
+
data_type_builder, exception, function, method, value::Lazy, Class, DataType, DataTypeFunctions, Error, Module, Object,
|
5
5
|
RArray, RClass, RModule, Ruby, TryConvert, TypedData,
|
6
6
|
};
|
7
7
|
|
@@ -12,7 +12,7 @@ use tk::pre_tokenizers::bert::BertPreTokenizer;
|
|
12
12
|
use tk::pre_tokenizers::byte_level::ByteLevel;
|
13
13
|
use tk::pre_tokenizers::delimiter::CharDelimiterSplit;
|
14
14
|
use tk::pre_tokenizers::digits::Digits;
|
15
|
-
use tk::pre_tokenizers::metaspace::Metaspace;
|
15
|
+
use tk::pre_tokenizers::metaspace::{Metaspace, PrependScheme};
|
16
16
|
use tk::pre_tokenizers::punctuation::Punctuation;
|
17
17
|
use tk::pre_tokenizers::split::Split;
|
18
18
|
use tk::pre_tokenizers::unicode_scripts::UnicodeScripts;
|
@@ -118,14 +118,6 @@ impl RbPreTokenizer {
|
|
118
118
|
setter!(self, Digits, individual_digits, individual_digits);
|
119
119
|
}
|
120
120
|
|
121
|
-
fn metaspace_add_prefix_space(&self) -> bool {
|
122
|
-
getter!(self, Metaspace, add_prefix_space)
|
123
|
-
}
|
124
|
-
|
125
|
-
fn metaspace_set_add_prefix_space(&self, add_prefix_space: bool) {
|
126
|
-
setter!(self, Metaspace, add_prefix_space, add_prefix_space);
|
127
|
-
}
|
128
|
-
|
129
121
|
fn metaspace_replacement(&self) -> String {
|
130
122
|
getter!(self, Metaspace, get_replacement().to_string())
|
131
123
|
}
|
@@ -133,6 +125,31 @@ impl RbPreTokenizer {
|
|
133
125
|
fn metaspace_set_replacement(&self, replacement: char) {
|
134
126
|
setter!(self, Metaspace, @set_replacement, replacement);
|
135
127
|
}
|
128
|
+
|
129
|
+
fn metaspace_split(&self) -> bool {
|
130
|
+
getter!(self, Metaspace, get_split())
|
131
|
+
}
|
132
|
+
|
133
|
+
fn metaspace_set_split(&self, split: bool) {
|
134
|
+
setter!(self, Metaspace, @set_split, split);
|
135
|
+
}
|
136
|
+
|
137
|
+
fn metaspace_prepend_scheme(&self) -> String {
|
138
|
+
// Assuming Metaspace has a method to get the prepend_scheme as a string
|
139
|
+
let scheme: PrependScheme = getter!(self, Metaspace, get_prepend_scheme());
|
140
|
+
match scheme {
|
141
|
+
PrependScheme::First => "first",
|
142
|
+
PrependScheme::Never => "never",
|
143
|
+
PrependScheme::Always => "always",
|
144
|
+
}
|
145
|
+
.to_string()
|
146
|
+
}
|
147
|
+
|
148
|
+
fn metaspace_set_prepend_scheme(&self, prepend_scheme: String) -> RbResult<()> {
|
149
|
+
let scheme = from_string(prepend_scheme)?;
|
150
|
+
setter!(self, Metaspace, @set_prepend_scheme, scheme);
|
151
|
+
Ok(())
|
152
|
+
}
|
136
153
|
}
|
137
154
|
|
138
155
|
impl PreTokenizer for RbPreTokenizer {
|
@@ -180,9 +197,11 @@ pub struct RbMetaspace {}
|
|
180
197
|
impl RbMetaspace {
|
181
198
|
fn new(
|
182
199
|
replacement: char,
|
183
|
-
|
184
|
-
|
185
|
-
|
200
|
+
prepend_scheme: String,
|
201
|
+
split: bool,
|
202
|
+
) -> RbResult<RbPreTokenizer> {
|
203
|
+
let prepend_scheme = from_string(prepend_scheme)?;
|
204
|
+
Ok(Metaspace::new(replacement, prepend_scheme, split).into())
|
186
205
|
}
|
187
206
|
}
|
188
207
|
|
@@ -239,8 +258,8 @@ pub struct RbSequence {}
|
|
239
258
|
impl RbSequence {
|
240
259
|
fn new(pre_tokenizers: RArray) -> RbResult<RbPreTokenizer> {
|
241
260
|
let mut sequence = Vec::with_capacity(pre_tokenizers.len());
|
242
|
-
for n in pre_tokenizers.
|
243
|
-
let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n
|
261
|
+
for n in pre_tokenizers.into_iter() {
|
262
|
+
let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n)?;
|
244
263
|
match &pretokenizer.pretok {
|
245
264
|
RbPreTokenizerTypeWrapper::Sequence(inner) => {
|
246
265
|
sequence.extend(inner.iter().cloned())
|
@@ -252,6 +271,21 @@ impl RbSequence {
|
|
252
271
|
}
|
253
272
|
}
|
254
273
|
|
274
|
+
pub(crate) fn from_string(string: String) -> RbResult<PrependScheme> {
|
275
|
+
let scheme = match string.as_str() {
|
276
|
+
"first" => PrependScheme::First,
|
277
|
+
"never" => PrependScheme::Never,
|
278
|
+
"always" => PrependScheme::Always,
|
279
|
+
_ => {
|
280
|
+
return Err(Error::new(exception::arg_error(), format!(
|
281
|
+
"{} is an unknown variant, should be one of ['first', 'never', 'always']",
|
282
|
+
string
|
283
|
+
)));
|
284
|
+
}
|
285
|
+
};
|
286
|
+
Ok(scheme)
|
287
|
+
}
|
288
|
+
|
255
289
|
#[derive(Clone, Deserialize)]
|
256
290
|
#[serde(untagged)]
|
257
291
|
pub(crate) enum RbPreTokenizerWrapper {
|
@@ -465,11 +499,13 @@ pub fn init_pre_tokenizers(ruby: &Ruby, module: &RModule) -> RbResult<()> {
|
|
465
499
|
class.define_method("individual_digits=", method!(RbPreTokenizer::digits_set_individual_digits, 1))?;
|
466
500
|
|
467
501
|
let class = module.define_class("Metaspace", pre_tokenizer)?;
|
468
|
-
class.define_singleton_method("_new", function!(RbMetaspace::new,
|
469
|
-
class.define_method("
|
470
|
-
class.define_method("
|
502
|
+
class.define_singleton_method("_new", function!(RbMetaspace::new, 3))?;
|
503
|
+
class.define_method("prepend_scheme", method!(RbPreTokenizer::metaspace_prepend_scheme, 0))?;
|
504
|
+
class.define_method("prepend_scheme=", method!(RbPreTokenizer::metaspace_set_prepend_scheme, 1))?;
|
471
505
|
class.define_method("replacement", method!(RbPreTokenizer::metaspace_replacement, 0))?;
|
472
506
|
class.define_method("replacement=", method!(RbPreTokenizer::metaspace_set_replacement, 1))?;
|
507
|
+
class.define_method("split", method!(RbPreTokenizer::metaspace_split, 0))?;
|
508
|
+
class.define_method("split=", method!(RbPreTokenizer::metaspace_set_split, 1))?;
|
473
509
|
|
474
510
|
let class = module.define_class("Punctuation", pre_tokenizer)?;
|
475
511
|
class.define_singleton_method("_new", function!(RbPunctuation::new, 1))?;
|
@@ -282,12 +282,12 @@ impl RbTokenizer {
|
|
282
282
|
add_special_tokens: bool,
|
283
283
|
) -> RbResult<RArray> {
|
284
284
|
let input: Vec<tk::EncodeInput> = input
|
285
|
-
.
|
285
|
+
.into_iter()
|
286
286
|
.map(|o| {
|
287
287
|
let input: tk::EncodeInput = if is_pretokenized {
|
288
|
-
PreTokenizedEncodeInput::try_convert(o
|
288
|
+
PreTokenizedEncodeInput::try_convert(o)?.into()
|
289
289
|
} else {
|
290
|
-
TextEncodeInput::try_convert(o
|
290
|
+
TextEncodeInput::try_convert(o)?.into()
|
291
291
|
};
|
292
292
|
Ok(input)
|
293
293
|
})
|
@@ -319,26 +319,26 @@ impl RbTokenizer {
|
|
319
319
|
.map_err(RbError::from)
|
320
320
|
}
|
321
321
|
|
322
|
-
pub fn set_decoder(&self, decoder:
|
323
|
-
self.tokenizer.borrow_mut().with_decoder(decoder.
|
322
|
+
pub fn set_decoder(&self, decoder: Option<&RbDecoder>) {
|
323
|
+
self.tokenizer.borrow_mut().with_decoder(decoder.cloned());
|
324
324
|
}
|
325
325
|
|
326
|
-
pub fn set_pre_tokenizer(&self, pretok:
|
326
|
+
pub fn set_pre_tokenizer(&self, pretok: Option<&RbPreTokenizer>) {
|
327
327
|
self.tokenizer
|
328
328
|
.borrow_mut()
|
329
|
-
.with_pre_tokenizer(pretok.
|
329
|
+
.with_pre_tokenizer(pretok.cloned());
|
330
330
|
}
|
331
331
|
|
332
|
-
pub fn set_post_processor(&self, processor:
|
332
|
+
pub fn set_post_processor(&self, processor: Option<&RbPostProcessor>) {
|
333
333
|
self.tokenizer
|
334
334
|
.borrow_mut()
|
335
|
-
.with_post_processor(processor.
|
335
|
+
.with_post_processor(processor.cloned());
|
336
336
|
}
|
337
337
|
|
338
|
-
pub fn set_normalizer(&self, normalizer:
|
338
|
+
pub fn set_normalizer(&self, normalizer: Option<&RbNormalizer>) {
|
339
339
|
self.tokenizer
|
340
340
|
.borrow_mut()
|
341
|
-
.with_normalizer(normalizer.
|
341
|
+
.with_normalizer(normalizer.cloned());
|
342
342
|
}
|
343
343
|
|
344
344
|
pub fn token_to_id(&self, token: String) -> Option<u32> {
|
@@ -110,9 +110,9 @@ impl RbTrainer {
|
|
110
110
|
BpeTrainer,
|
111
111
|
special_tokens,
|
112
112
|
special_tokens
|
113
|
-
.
|
113
|
+
.into_iter()
|
114
114
|
.map(|token| {
|
115
|
-
if let Ok(content) = String::try_convert(token
|
115
|
+
if let Ok(content) = String::try_convert(token) {
|
116
116
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
117
117
|
} else {
|
118
118
|
todo!()
|
@@ -197,9 +197,9 @@ impl RbTrainer {
|
|
197
197
|
UnigramTrainer,
|
198
198
|
special_tokens,
|
199
199
|
special_tokens
|
200
|
-
.
|
200
|
+
.into_iter()
|
201
201
|
.map(|token| {
|
202
|
-
if let Ok(content) = String::try_convert(token
|
202
|
+
if let Ok(content) = String::try_convert(token) {
|
203
203
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
204
204
|
} else {
|
205
205
|
todo!()
|
@@ -268,9 +268,9 @@ impl RbTrainer {
|
|
268
268
|
WordLevelTrainer,
|
269
269
|
special_tokens,
|
270
270
|
special_tokens
|
271
|
-
.
|
271
|
+
.into_iter()
|
272
272
|
.map(|token| {
|
273
|
-
if let Ok(content) = String::try_convert(token
|
273
|
+
if let Ok(content) = String::try_convert(token) {
|
274
274
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
275
275
|
} else {
|
276
276
|
todo!()
|
@@ -322,9 +322,9 @@ impl RbTrainer {
|
|
322
322
|
WordPieceTrainer,
|
323
323
|
@set_special_tokens,
|
324
324
|
special_tokens
|
325
|
-
.
|
325
|
+
.into_iter()
|
326
326
|
.map(|token| {
|
327
|
-
if let Ok(content) = String::try_convert(token
|
327
|
+
if let Ok(content) = String::try_convert(token) {
|
328
328
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
329
329
|
} else {
|
330
330
|
todo!()
|
@@ -398,9 +398,9 @@ impl RbBpeTrainer {
|
|
398
398
|
if !value.is_nil() {
|
399
399
|
builder = builder.special_tokens(
|
400
400
|
RArray::try_convert(value)?
|
401
|
-
.
|
401
|
+
.into_iter()
|
402
402
|
.map(|token| {
|
403
|
-
if let Ok(content) = String::try_convert(token
|
403
|
+
if let Ok(content) = String::try_convert(token) {
|
404
404
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
405
405
|
} else {
|
406
406
|
todo!()
|
@@ -466,9 +466,9 @@ impl RbUnigramTrainer {
|
|
466
466
|
if !value.is_nil() {
|
467
467
|
builder.special_tokens(
|
468
468
|
RArray::try_convert(value)?
|
469
|
-
.
|
469
|
+
.into_iter()
|
470
470
|
.map(|token| {
|
471
|
-
if let Ok(content) = String::try_convert(token
|
471
|
+
if let Ok(content) = String::try_convert(token) {
|
472
472
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
473
473
|
} else {
|
474
474
|
todo!()
|
@@ -540,9 +540,9 @@ impl RbWordLevelTrainer {
|
|
540
540
|
if !value.is_nil() {
|
541
541
|
builder.special_tokens(
|
542
542
|
RArray::try_convert(value)?
|
543
|
-
.
|
543
|
+
.into_iter()
|
544
544
|
.map(|token| {
|
545
|
-
if let Ok(content) = String::try_convert(token
|
545
|
+
if let Ok(content) = String::try_convert(token) {
|
546
546
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
547
547
|
} else {
|
548
548
|
todo!()
|
@@ -581,9 +581,9 @@ impl RbWordPieceTrainer {
|
|
581
581
|
if !value.is_nil() {
|
582
582
|
builder = builder.special_tokens(
|
583
583
|
RArray::try_convert(value)?
|
584
|
-
.
|
584
|
+
.into_iter()
|
585
585
|
.map(|token| {
|
586
|
-
if let Ok(content) = String::try_convert(token
|
586
|
+
if let Ok(content) = String::try_convert(token) {
|
587
587
|
Ok(RbAddedToken::from(content, Some(true)).get_token())
|
588
588
|
} else {
|
589
589
|
todo!()
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Tokenizers
|
2
2
|
module Decoders
|
3
3
|
class Metaspace
|
4
|
-
def self.new(replacement: "\u2581",
|
5
|
-
_new(replacement,
|
4
|
+
def self.new(replacement: "\u2581", prepend_scheme: "always", split: true)
|
5
|
+
_new(replacement, prepend_scheme, split)
|
6
6
|
end
|
7
7
|
end
|
8
8
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Tokenizers
|
2
2
|
module FromPretrained
|
3
3
|
# for user agent
|
4
|
-
TOKENIZERS_VERSION = "0.
|
4
|
+
TOKENIZERS_VERSION = "0.20.0"
|
5
5
|
|
6
6
|
# use Ruby for downloads
|
7
7
|
# this avoids the need to vendor OpenSSL on Linux
|
@@ -67,7 +67,7 @@ module Tokenizers
|
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
70
|
-
options[:content_length_proc] = ->
|
70
|
+
options[:content_length_proc] = ->(_) { puts "Downloading..." }
|
71
71
|
|
72
72
|
# string options are headers
|
73
73
|
tempfile = URI.parse(url).open(headers.merge(options))
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Tokenizers
|
2
2
|
module PreTokenizers
|
3
3
|
class Metaspace
|
4
|
-
def self.new(replacement: "\u2581",
|
5
|
-
_new(replacement,
|
4
|
+
def self.new(replacement: "\u2581", prepend_scheme: "always", split: true)
|
5
|
+
_new(replacement, prepend_scheme, split)
|
6
6
|
end
|
7
7
|
end
|
8
8
|
end
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-08-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -93,14 +93,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '3'
|
96
|
+
version: '3.1'
|
97
97
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
98
|
requirements:
|
99
99
|
- - ">="
|
100
100
|
- !ruby/object:Gem::Version
|
101
101
|
version: '0'
|
102
102
|
requirements: []
|
103
|
-
rubygems_version: 3.5.
|
103
|
+
rubygems_version: 3.5.11
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: Fast state-of-the-art tokenizers for Ruby
|