tokenizers 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6e88ec5618e36e317434410c960603695806bb59dadb2252f2957d8dbf0525b
4
- data.tar.gz: 33a04a4a5faada27e6e7246c16d836a4ff9f6793e89de3cfd4880e30c6c8ed0d
3
+ metadata.gz: 0677a3662d1f3af9007bdc0524a71903270cfbb85c3bf0efaa2365946df6c5b1
4
+ data.tar.gz: 00e49737180957c88fb8f8418de40c8983715c733275df16c1ab2fe97355f92d
5
5
  SHA512:
6
- metadata.gz: 88e4f2ad57fd1d66cd5fcf0d8b7ff6b1ea902258296fb02d207a446032134189e3445a104658074e94f914331c94f46cfdd09eed7c745c0483cb3b32b09e6abf
7
- data.tar.gz: e8a1721ecbd36874322477077331743b0d1ba2de6f90076e07ad5456c230f76625d7f28ed6e6026c11395c6bb27701a6b8c0feedf2050387d32d9b777baa51fe
6
+ metadata.gz: f1fd8f9a57be7cac938ae6bc46533f540b210ef6d46a3f46e168a257d2b3156000b9f451e1085a9c90dd54444abeaa69b5d2ae2905fdb5abfc85fb7d610f1427
7
+ data.tar.gz: c748fb9150431ecce025e9f98f870e8140f186a2cc796eb5df2dfef630db05ab45eb8f23f2dc5e50d39b4eadb5fc1f5e4902482f41b6dd469ff95eae4ec3d2f3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.4.0 (2023-07-20)
2
+
3
+ - Updated Tokenizers to 0.14.0
4
+ - Dropped support for Ruby < 3
5
+
1
6
  ## 0.3.3 (2023-04-09)
2
7
 
3
8
  - Updated Tokenizers to 0.13.3
data/Cargo.lock CHANGED
@@ -11,6 +11,15 @@ dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
+ [[package]]
15
+ name = "aho-corasick"
16
+ version = "1.0.5"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
19
+ dependencies = [
20
+ "memchr",
21
+ ]
22
+
14
23
  [[package]]
15
24
  name = "autocfg"
16
25
  version = "1.1.0"
@@ -25,9 +34,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
25
34
 
26
35
  [[package]]
27
36
  name = "bindgen"
28
- version = "0.60.1"
37
+ version = "0.62.0"
29
38
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
39
+ checksum = "c6720a8b7b2d39dd533285ed438d458f65b31b5c257e6ac7bb3d7e82844dd722"
31
40
  dependencies = [
32
41
  "bitflags",
33
42
  "cexpr",
@@ -40,6 +49,7 @@ dependencies = [
40
49
  "regex",
41
50
  "rustc-hash",
42
51
  "shlex",
52
+ "syn 1.0.109",
43
53
  ]
44
54
 
45
55
  [[package]]
@@ -352,31 +362,32 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
352
362
 
353
363
  [[package]]
354
364
  name = "magnus"
355
- version = "0.5.3"
365
+ version = "0.6.0"
356
366
  source = "registry+https://github.com/rust-lang/crates.io-index"
357
- checksum = "c8dc14463c2552e753ef562961f486ca76f17a857c121db40e9f3ade3f35ab81"
367
+ checksum = "68e9585bfe236e88e6b10b6d8eb5349bd0e0009f3f9dff8d2e99a82601b33743"
358
368
  dependencies = [
359
369
  "magnus-macros",
360
370
  "rb-sys",
361
371
  "rb-sys-env",
372
+ "seq-macro",
362
373
  ]
363
374
 
364
375
  [[package]]
365
376
  name = "magnus-macros"
366
- version = "0.4.1"
377
+ version = "0.6.0"
367
378
  source = "registry+https://github.com/rust-lang/crates.io-index"
368
- checksum = "6cc17af1d45442c011aa579d727ec6cff8a69aea8a6bbad26736e7112d749bfb"
379
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
369
380
  dependencies = [
370
381
  "proc-macro2",
371
382
  "quote",
372
- "syn 1.0.109",
383
+ "syn 2.0.13",
373
384
  ]
374
385
 
375
386
  [[package]]
376
387
  name = "memchr"
377
- version = "2.5.0"
388
+ version = "2.6.3"
378
389
  source = "registry+https://github.com/rust-lang/crates.io-index"
379
- checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
390
+ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
380
391
 
381
392
  [[package]]
382
393
  name = "memoffset"
@@ -575,18 +586,18 @@ dependencies = [
575
586
 
576
587
  [[package]]
577
588
  name = "rb-sys"
578
- version = "0.9.71"
589
+ version = "0.9.79"
579
590
  source = "registry+https://github.com/rust-lang/crates.io-index"
580
- checksum = "156bfedced1e236600bcaad538477097ff2ed5c6b474e411d15b791e1d24c0f1"
591
+ checksum = "939fb78db3e4f26665c1d4c7b91ca66d3578335a19aba552d4a6445811d07072"
581
592
  dependencies = [
582
593
  "rb-sys-build",
583
594
  ]
584
595
 
585
596
  [[package]]
586
597
  name = "rb-sys-build"
587
- version = "0.9.71"
598
+ version = "0.9.79"
588
599
  source = "registry+https://github.com/rust-lang/crates.io-index"
589
- checksum = "5cb2e4a32cbc290b543a74567072ad24b708aff7bb5dde5a68d5690379cd7938"
600
+ checksum = "335a95eb0420d52fa94ef12019df3c2c250c6b19cbb3c60bd05cb7e9c362072c"
590
601
  dependencies = [
591
602
  "bindgen",
592
603
  "lazy_static",
@@ -605,20 +616,32 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
605
616
 
606
617
  [[package]]
607
618
  name = "regex"
608
- version = "1.7.3"
619
+ version = "1.9.5"
609
620
  source = "registry+https://github.com/rust-lang/crates.io-index"
610
- checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
621
+ checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
611
622
  dependencies = [
612
- "aho-corasick",
623
+ "aho-corasick 1.0.5",
624
+ "memchr",
625
+ "regex-automata",
626
+ "regex-syntax",
627
+ ]
628
+
629
+ [[package]]
630
+ name = "regex-automata"
631
+ version = "0.3.8"
632
+ source = "registry+https://github.com/rust-lang/crates.io-index"
633
+ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
634
+ dependencies = [
635
+ "aho-corasick 1.0.5",
613
636
  "memchr",
614
637
  "regex-syntax",
615
638
  ]
616
639
 
617
640
  [[package]]
618
641
  name = "regex-syntax"
619
- version = "0.6.29"
642
+ version = "0.7.5"
620
643
  source = "registry+https://github.com/rust-lang/crates.io-index"
621
- checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
644
+ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
622
645
 
623
646
  [[package]]
624
647
  name = "rustc-hash"
@@ -638,6 +661,12 @@ version = "1.1.0"
638
661
  source = "registry+https://github.com/rust-lang/crates.io-index"
639
662
  checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
640
663
 
664
+ [[package]]
665
+ name = "seq-macro"
666
+ version = "0.3.5"
667
+ source = "registry+https://github.com/rust-lang/crates.io-index"
668
+ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
669
+
641
670
  [[package]]
642
671
  name = "serde"
643
672
  version = "1.0.159"
@@ -749,21 +778,21 @@ dependencies = [
749
778
 
750
779
  [[package]]
751
780
  name = "tokenizers"
752
- version = "0.3.3"
781
+ version = "0.4.0"
753
782
  dependencies = [
754
783
  "magnus",
755
784
  "onig",
756
785
  "serde",
757
- "tokenizers 0.13.3",
786
+ "tokenizers 0.14.0",
758
787
  ]
759
788
 
760
789
  [[package]]
761
790
  name = "tokenizers"
762
- version = "0.13.3"
791
+ version = "0.14.0"
763
792
  source = "registry+https://github.com/rust-lang/crates.io-index"
764
- checksum = "5cf49017523bf0bc01c9966f172c5f120bbb7b96cccd1708772dd42e767fb9f5"
793
+ checksum = "12b515a66453a4d68f03398054f7204fd0dde6b93d3f20ea90b08025ab49b499"
765
794
  dependencies = [
766
- "aho-corasick",
795
+ "aho-corasick 0.7.20",
767
796
  "derive_builder",
768
797
  "esaxx-rs",
769
798
  "getrandom",
@@ -1,20 +1,21 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.3.3"
3
+ version = "0.4.0"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
+ rust-version = "1.62.0"
7
8
  publish = false
8
9
 
9
10
  [lib]
10
11
  crate-type = ["cdylib"]
11
12
 
12
13
  [dependencies]
13
- magnus = "0.5"
14
+ magnus = "0.6"
14
15
  onig = { version = "6", default-features = false }
15
16
  serde = { version = "1", features = ["rc", "derive"] }
16
17
 
17
18
  [dependencies.tokenizers]
18
- version = "=0.13.3" # also update in from_pretrained.rb
19
+ version = "=0.14.0" # also update in from_pretrained.rb
19
20
  default-features = false
20
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -1,9 +1,9 @@
1
1
  use std::sync::{Arc, RwLock};
2
2
 
3
- use magnus::typed_data::DataTypeBuilder;
3
+ use magnus::value::Lazy;
4
4
  use magnus::{
5
- function, memoize, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
6
- TypedData,
5
+ data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
6
+ Ruby, TypedData,
7
7
  };
8
8
  use serde::{Deserialize, Serialize};
9
9
  use tk::decoders::bpe::BPEDecoder;
@@ -19,7 +19,7 @@ use tk::Decoder;
19
19
  use tk::normalizers::replace::Replace;
20
20
 
21
21
  use super::utils::*;
22
- use super::{RbError, RbResult};
22
+ use super::{DECODERS, RbError, RbResult};
23
23
 
24
24
  #[derive(DataTypeFunctions, Clone, Deserialize, Serialize)]
25
25
  pub struct RbDecoder {
@@ -260,74 +260,85 @@ impl Decoder for RbDecoderWrapper {
260
260
  }
261
261
 
262
262
  unsafe impl TypedData for RbDecoder {
263
- fn class() -> RClass {
264
- *memoize!(RClass: {
265
- let class: RClass = crate::decoders().const_get("Decoder").unwrap();
266
- class.undef_alloc_func();
267
- class
268
- })
263
+ fn class(ruby: &Ruby) -> RClass {
264
+ static CLASS: Lazy<RClass> = Lazy::new(|ruby| {
265
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Decoder").unwrap();
266
+ class.undef_default_alloc_func();
267
+ class
268
+ });
269
+ ruby.get_inner(&CLASS)
269
270
  }
270
271
 
271
272
  fn data_type() -> &'static DataType {
272
- memoize!(DataType: DataTypeBuilder::<RbDecoder>::new("Tokenizers::Decoders::Decoder").build())
273
- }
274
-
275
- fn class_for(value: &Self) -> RClass {
273
+ static DATA_TYPE: DataType = data_type_builder!(RbDecoder, "Tokenizers::Decoders::Decoder").build();
274
+ &DATA_TYPE
275
+ }
276
+
277
+ fn class_for(ruby: &Ruby, value: &Self) -> RClass {
278
+ static BPE_DECODER: Lazy<RClass> = Lazy::new(|ruby| {
279
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("BPEDecoder").unwrap();
280
+ class.undef_default_alloc_func();
281
+ class
282
+ });
283
+ static BYTE_FALLBACK: Lazy<RClass> = Lazy::new(|ruby| {
284
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("ByteFallback").unwrap();
285
+ class.undef_default_alloc_func();
286
+ class
287
+ });
288
+ static BYTE_LEVEL: Lazy<RClass> = Lazy::new(|ruby| {
289
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("ByteLevel").unwrap();
290
+ class.undef_default_alloc_func();
291
+ class
292
+ });
293
+ static CTC: Lazy<RClass> = Lazy::new(|ruby| {
294
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("CTC").unwrap();
295
+ class.undef_default_alloc_func();
296
+ class
297
+ });
298
+ static FUSE: Lazy<RClass> = Lazy::new(|ruby| {
299
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Fuse").unwrap();
300
+ class.undef_default_alloc_func();
301
+ class
302
+ });
303
+ static METASPACE: Lazy<RClass> = Lazy::new(|ruby| {
304
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Metaspace").unwrap();
305
+ class.undef_default_alloc_func();
306
+ class
307
+ });
308
+ static REPLACE: Lazy<RClass> = Lazy::new(|ruby| {
309
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Replace").unwrap();
310
+ class.undef_default_alloc_func();
311
+ class
312
+ });
313
+ static STRIP: Lazy<RClass> = Lazy::new(|ruby| {
314
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Strip").unwrap();
315
+ class.undef_default_alloc_func();
316
+ class
317
+ });
318
+ static WORD_PIECE: Lazy<RClass> = Lazy::new(|ruby| {
319
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("WordPiece").unwrap();
320
+ class.undef_default_alloc_func();
321
+ class
322
+ });
276
323
  match &value.decoder {
277
324
  RbDecoderWrapper::Wrapped(inner) => match *inner.read().unwrap() {
278
- DecoderWrapper::BPE(_) => *memoize!(RClass: {
279
- let class: RClass = crate::decoders().const_get("BPEDecoder").unwrap();
280
- class.undef_alloc_func();
281
- class
282
- }),
283
- DecoderWrapper::ByteFallback(_) => *memoize!(RClass: {
284
- let class: RClass = crate::decoders().const_get("ByteFallback").unwrap();
285
- class.undef_alloc_func();
286
- class
287
- }),
288
- DecoderWrapper::ByteLevel(_) => *memoize!(RClass: {
289
- let class: RClass = crate::decoders().const_get("ByteLevel").unwrap();
290
- class.undef_alloc_func();
291
- class
292
- }),
293
- DecoderWrapper::CTC(_) => *memoize!(RClass: {
294
- let class: RClass = crate::decoders().const_get("CTC").unwrap();
295
- class.undef_alloc_func();
296
- class
297
- }),
298
- DecoderWrapper::Fuse(_) => *memoize!(RClass: {
299
- let class: RClass = crate::decoders().const_get("Fuse").unwrap();
300
- class.undef_alloc_func();
301
- class
302
- }),
303
- DecoderWrapper::Metaspace(_) => *memoize!(RClass: {
304
- let class: RClass = crate::decoders().const_get("Metaspace").unwrap();
305
- class.undef_alloc_func();
306
- class
307
- }),
308
- DecoderWrapper::Replace(_) => *memoize!(RClass: {
309
- let class: RClass = crate::decoders().const_get("Replace").unwrap();
310
- class.undef_alloc_func();
311
- class
312
- }),
313
- DecoderWrapper::Strip(_) => *memoize!(RClass: {
314
- let class: RClass = crate::decoders().const_get("Strip").unwrap();
315
- class.undef_alloc_func();
316
- class
317
- }),
318
- DecoderWrapper::WordPiece(_) => *memoize!(RClass: {
319
- let class: RClass = crate::decoders().const_get("WordPiece").unwrap();
320
- class.undef_alloc_func();
321
- class
322
- }),
325
+ DecoderWrapper::BPE(_) => ruby.get_inner(&BPE_DECODER),
326
+ DecoderWrapper::ByteFallback(_) => ruby.get_inner(&BYTE_FALLBACK),
327
+ DecoderWrapper::ByteLevel(_) => ruby.get_inner(&BYTE_LEVEL),
328
+ DecoderWrapper::CTC(_) => ruby.get_inner(&CTC),
329
+ DecoderWrapper::Fuse(_) => ruby.get_inner(&FUSE),
330
+ DecoderWrapper::Metaspace(_) => ruby.get_inner(&METASPACE),
331
+ DecoderWrapper::Replace(_) => ruby.get_inner(&REPLACE),
332
+ DecoderWrapper::Strip(_) => ruby.get_inner(&STRIP),
333
+ DecoderWrapper::WordPiece(_) => ruby.get_inner(&WORD_PIECE),
323
334
  _ => todo!(),
324
335
  },
325
336
  }
326
337
  }
327
338
  }
328
339
 
329
- pub fn decoders(module: &RModule) -> RbResult<()> {
330
- let decoder = module.define_class("Decoder", Default::default())?;
340
+ pub fn init_decoders(ruby: &Ruby, module: &RModule) -> RbResult<()> {
341
+ let decoder = module.define_class("Decoder", ruby.class_object())?;
331
342
 
332
343
  let class = module.define_class("BPEDecoder", decoder)?;
333
344
  class.define_singleton_method("_new", function!(RbBPEDecoder::new, 1))?;
@@ -1,6 +1,6 @@
1
- use magnus::{memoize, Error, ExceptionClass, Module};
1
+ use magnus::{prelude::*, value::Lazy, Error, ExceptionClass, Ruby};
2
2
 
3
- use super::module;
3
+ use super::TOKENIZERS;
4
4
 
5
5
  pub struct RbError {}
6
6
 
@@ -11,6 +11,8 @@ impl RbError {
11
11
  }
12
12
  }
13
13
 
14
+ static ERROR: Lazy<ExceptionClass> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Error").unwrap());
15
+
14
16
  fn error() -> ExceptionClass {
15
- *memoize!(ExceptionClass: module().const_get("Error").unwrap())
17
+ Ruby::get().unwrap().get_inner(&ERROR)
16
18
  }
@@ -1,3 +1,5 @@
1
+ #![allow(clippy::new_ret_no_self)]
2
+
1
3
  extern crate tokenizers as tk;
2
4
 
3
5
  mod decoders;
@@ -16,43 +18,29 @@ use error::RbError;
16
18
  use tokenizer::RbTokenizer;
17
19
  use utils::RbRegex;
18
20
 
19
- use magnus::{define_module, function, memoize, method, prelude::*, Error, RModule};
21
+ use magnus::{function, method, prelude::*, value::Lazy, Error, RModule, Ruby};
20
22
 
21
23
  type RbResult<T> = Result<T, Error>;
22
24
 
23
- fn module() -> RModule {
24
- *memoize!(RModule: define_module("Tokenizers").unwrap())
25
- }
25
+ static TOKENIZERS: Lazy<RModule> = Lazy::new(|ruby| ruby.class_object().const_get("Tokenizers").unwrap());
26
26
 
27
- fn decoders() -> RModule {
28
- *memoize!(RModule: module().const_get("Decoders").unwrap())
29
- }
27
+ static DECODERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Decoders").unwrap());
30
28
 
31
- fn models() -> RModule {
32
- *memoize!(RModule: module().const_get("Models").unwrap())
33
- }
29
+ static MODELS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Models").unwrap());
34
30
 
35
- fn normalizers() -> RModule {
36
- *memoize!(RModule: module().const_get("Normalizers").unwrap())
37
- }
31
+ static NORMALIZERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Normalizers").unwrap());
38
32
 
39
- fn pre_tokenizers() -> RModule {
40
- *memoize!(RModule: module().const_get("PreTokenizers").unwrap())
41
- }
33
+ static PRE_TOKENIZERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("PreTokenizers").unwrap());
42
34
 
43
- fn processors() -> RModule {
44
- *memoize!(RModule: module().const_get("Processors").unwrap())
45
- }
35
+ static PROCESSORS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Processors").unwrap());
46
36
 
47
- fn trainers() -> RModule {
48
- *memoize!(RModule: module().const_get("Trainers").unwrap())
49
- }
37
+ static TRAINERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Trainers").unwrap());
50
38
 
51
39
  #[magnus::init]
52
- fn init() -> RbResult<()> {
53
- let module = module();
40
+ fn init(ruby: &Ruby) -> RbResult<()> {
41
+ let module = ruby.get_inner(&TOKENIZERS);
54
42
 
55
- let class = module.define_class("Tokenizer", Default::default())?;
43
+ let class = module.define_class("Tokenizer", ruby.class_object())?;
56
44
  class.define_singleton_method("new", function!(RbTokenizer::from_model, 1))?;
57
45
  class.define_singleton_method("from_file", function!(RbTokenizer::from_file, 1))?;
58
46
  class.define_method(
@@ -86,7 +74,7 @@ fn init() -> RbResult<()> {
86
74
  class.define_method("_vocab_size", method!(RbTokenizer::vocab_size, 1))?;
87
75
  class.define_method("_to_s", method!(RbTokenizer::to_str, 1))?;
88
76
 
89
- let class = module.define_class("Encoding", Default::default())?;
77
+ let class = module.define_class("Encoding", ruby.class_object())?;
90
78
  class.define_method("n_sequences", method!(RbEncoding::n_sequences, 0))?;
91
79
  class.define_method("ids", method!(RbEncoding::ids, 0))?;
92
80
  class.define_method("tokens", method!(RbEncoding::tokens, 0))?;
@@ -111,7 +99,7 @@ fn init() -> RbResult<()> {
111
99
  class.define_method("_char_to_token", method!(RbEncoding::char_to_token, 2))?;
112
100
  class.define_method("_char_to_word", method!(RbEncoding::char_to_word, 2))?;
113
101
 
114
- let class = module.define_class("Regex", Default::default())?;
102
+ let class = module.define_class("Regex", ruby.class_object())?;
115
103
  class.define_singleton_method("new", function!(RbRegex::new, 1))?;
116
104
 
117
105
  let models = module.define_module("Models")?;
@@ -121,12 +109,12 @@ fn init() -> RbResult<()> {
121
109
  let normalizers = module.define_module("Normalizers")?;
122
110
  let trainers = module.define_module("Trainers")?;
123
111
 
124
- models::models(&models)?;
125
- pre_tokenizers::pre_tokenizers(&pre_tokenizers)?;
126
- decoders::decoders(&decoders)?;
127
- processors::processors(&processors)?;
128
- normalizers::normalizers(&normalizers)?;
129
- trainers::trainers(&trainers)?;
112
+ models::init_models(ruby, &models)?;
113
+ pre_tokenizers::init_pre_tokenizers(ruby, &pre_tokenizers)?;
114
+ decoders::init_decoders(ruby, &decoders)?;
115
+ processors::init_processors(ruby, &processors)?;
116
+ normalizers::init_normalizers(ruby, &normalizers)?;
117
+ trainers::init_trainers(ruby, &trainers)?;
130
118
 
131
119
  Ok(())
132
120
  }