tokenizers 0.3.3 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6e88ec5618e36e317434410c960603695806bb59dadb2252f2957d8dbf0525b
4
- data.tar.gz: 33a04a4a5faada27e6e7246c16d836a4ff9f6793e89de3cfd4880e30c6c8ed0d
3
+ metadata.gz: ae078880dfee0d026206156174a482b7e5345aea4784bb4a3e1298c499dd0e3d
4
+ data.tar.gz: baedf2cd55c0b4332232924bc2439e8ab9f6ba6703794e376f7f34f5724717c2
5
5
  SHA512:
6
- metadata.gz: 88e4f2ad57fd1d66cd5fcf0d8b7ff6b1ea902258296fb02d207a446032134189e3445a104658074e94f914331c94f46cfdd09eed7c745c0483cb3b32b09e6abf
7
- data.tar.gz: e8a1721ecbd36874322477077331743b0d1ba2de6f90076e07ad5456c230f76625d7f28ed6e6026c11395c6bb27701a6b8c0feedf2050387d32d9b777baa51fe
6
+ metadata.gz: 6292155935e06d70b9ab862d2493154ec21f3cc1ec9a7188e00517f026a3d79460f84b08c9701b6eab2b758ab27ce2a5a4fb90c517ec7a1817f5de31a0b95324
7
+ data.tar.gz: 99b04f81650ae8b12be1e82dc8989a37d9d90542cb461c7fadf0e618f8ac4592b614fa357a462d2e71cb8833e058678ff6e5e5d421b825c969559f5569c89cd5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.4.1 (2023-10-05)
2
+
3
+ - Fixed error loading gem
4
+
5
+ ## 0.4.0 (2023-09-20)
6
+
7
+ - Updated Tokenizers to 0.14.0
8
+ - Dropped support for Ruby < 3
9
+
1
10
  ## 0.3.3 (2023-04-09)
2
11
 
3
12
  - Updated Tokenizers to 0.13.3
data/Cargo.lock CHANGED
@@ -11,6 +11,15 @@ dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
+ [[package]]
15
+ name = "aho-corasick"
16
+ version = "1.0.5"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
19
+ dependencies = [
20
+ "memchr",
21
+ ]
22
+
14
23
  [[package]]
15
24
  name = "autocfg"
16
25
  version = "1.1.0"
@@ -25,9 +34,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
25
34
 
26
35
  [[package]]
27
36
  name = "bindgen"
28
- version = "0.60.1"
37
+ version = "0.62.0"
29
38
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
39
+ checksum = "c6720a8b7b2d39dd533285ed438d458f65b31b5c257e6ac7bb3d7e82844dd722"
31
40
  dependencies = [
32
41
  "bitflags",
33
42
  "cexpr",
@@ -40,6 +49,7 @@ dependencies = [
40
49
  "regex",
41
50
  "rustc-hash",
42
51
  "shlex",
52
+ "syn 1.0.109",
43
53
  ]
44
54
 
45
55
  [[package]]
@@ -352,31 +362,32 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
352
362
 
353
363
  [[package]]
354
364
  name = "magnus"
355
- version = "0.5.3"
365
+ version = "0.6.0"
356
366
  source = "registry+https://github.com/rust-lang/crates.io-index"
357
- checksum = "c8dc14463c2552e753ef562961f486ca76f17a857c121db40e9f3ade3f35ab81"
367
+ checksum = "68e9585bfe236e88e6b10b6d8eb5349bd0e0009f3f9dff8d2e99a82601b33743"
358
368
  dependencies = [
359
369
  "magnus-macros",
360
370
  "rb-sys",
361
371
  "rb-sys-env",
372
+ "seq-macro",
362
373
  ]
363
374
 
364
375
  [[package]]
365
376
  name = "magnus-macros"
366
- version = "0.4.1"
377
+ version = "0.6.0"
367
378
  source = "registry+https://github.com/rust-lang/crates.io-index"
368
- checksum = "6cc17af1d45442c011aa579d727ec6cff8a69aea8a6bbad26736e7112d749bfb"
379
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
369
380
  dependencies = [
370
381
  "proc-macro2",
371
382
  "quote",
372
- "syn 1.0.109",
383
+ "syn 2.0.13",
373
384
  ]
374
385
 
375
386
  [[package]]
376
387
  name = "memchr"
377
- version = "2.5.0"
388
+ version = "2.6.3"
378
389
  source = "registry+https://github.com/rust-lang/crates.io-index"
379
- checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
390
+ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
380
391
 
381
392
  [[package]]
382
393
  name = "memoffset"
@@ -575,18 +586,18 @@ dependencies = [
575
586
 
576
587
  [[package]]
577
588
  name = "rb-sys"
578
- version = "0.9.71"
589
+ version = "0.9.79"
579
590
  source = "registry+https://github.com/rust-lang/crates.io-index"
580
- checksum = "156bfedced1e236600bcaad538477097ff2ed5c6b474e411d15b791e1d24c0f1"
591
+ checksum = "939fb78db3e4f26665c1d4c7b91ca66d3578335a19aba552d4a6445811d07072"
581
592
  dependencies = [
582
593
  "rb-sys-build",
583
594
  ]
584
595
 
585
596
  [[package]]
586
597
  name = "rb-sys-build"
587
- version = "0.9.71"
598
+ version = "0.9.79"
588
599
  source = "registry+https://github.com/rust-lang/crates.io-index"
589
- checksum = "5cb2e4a32cbc290b543a74567072ad24b708aff7bb5dde5a68d5690379cd7938"
600
+ checksum = "335a95eb0420d52fa94ef12019df3c2c250c6b19cbb3c60bd05cb7e9c362072c"
590
601
  dependencies = [
591
602
  "bindgen",
592
603
  "lazy_static",
@@ -605,20 +616,32 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
605
616
 
606
617
  [[package]]
607
618
  name = "regex"
608
- version = "1.7.3"
619
+ version = "1.9.5"
620
+ source = "registry+https://github.com/rust-lang/crates.io-index"
621
+ checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
622
+ dependencies = [
623
+ "aho-corasick 1.0.5",
624
+ "memchr",
625
+ "regex-automata",
626
+ "regex-syntax",
627
+ ]
628
+
629
+ [[package]]
630
+ name = "regex-automata"
631
+ version = "0.3.8"
609
632
  source = "registry+https://github.com/rust-lang/crates.io-index"
610
- checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
633
+ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
611
634
  dependencies = [
612
- "aho-corasick",
635
+ "aho-corasick 1.0.5",
613
636
  "memchr",
614
637
  "regex-syntax",
615
638
  ]
616
639
 
617
640
  [[package]]
618
641
  name = "regex-syntax"
619
- version = "0.6.29"
642
+ version = "0.7.5"
620
643
  source = "registry+https://github.com/rust-lang/crates.io-index"
621
- checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
644
+ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
622
645
 
623
646
  [[package]]
624
647
  name = "rustc-hash"
@@ -638,6 +661,12 @@ version = "1.1.0"
638
661
  source = "registry+https://github.com/rust-lang/crates.io-index"
639
662
  checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
640
663
 
664
+ [[package]]
665
+ name = "seq-macro"
666
+ version = "0.3.5"
667
+ source = "registry+https://github.com/rust-lang/crates.io-index"
668
+ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
669
+
641
670
  [[package]]
642
671
  name = "serde"
643
672
  version = "1.0.159"
@@ -749,21 +778,21 @@ dependencies = [
749
778
 
750
779
  [[package]]
751
780
  name = "tokenizers"
752
- version = "0.3.3"
781
+ version = "0.4.1"
753
782
  dependencies = [
754
783
  "magnus",
755
784
  "onig",
756
785
  "serde",
757
- "tokenizers 0.13.3",
786
+ "tokenizers 0.14.0",
758
787
  ]
759
788
 
760
789
  [[package]]
761
790
  name = "tokenizers"
762
- version = "0.13.3"
791
+ version = "0.14.0"
763
792
  source = "registry+https://github.com/rust-lang/crates.io-index"
764
- checksum = "5cf49017523bf0bc01c9966f172c5f120bbb7b96cccd1708772dd42e767fb9f5"
793
+ checksum = "12b515a66453a4d68f03398054f7204fd0dde6b93d3f20ea90b08025ab49b499"
765
794
  dependencies = [
766
- "aho-corasick",
795
+ "aho-corasick 0.7.20",
767
796
  "derive_builder",
768
797
  "esaxx-rs",
769
798
  "getrandom",
@@ -1,20 +1,21 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.3.3"
3
+ version = "0.4.1"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
+ rust-version = "1.62.0"
7
8
  publish = false
8
9
 
9
10
  [lib]
10
11
  crate-type = ["cdylib"]
11
12
 
12
13
  [dependencies]
13
- magnus = "0.5"
14
+ magnus = "0.6"
14
15
  onig = { version = "6", default-features = false }
15
16
  serde = { version = "1", features = ["rc", "derive"] }
16
17
 
17
18
  [dependencies.tokenizers]
18
- version = "=0.13.3" # also update in from_pretrained.rb
19
+ version = "=0.14.0" # also update in from_pretrained.rb
19
20
  default-features = false
20
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -1,9 +1,9 @@
1
1
  use std::sync::{Arc, RwLock};
2
2
 
3
- use magnus::typed_data::DataTypeBuilder;
3
+ use magnus::value::Lazy;
4
4
  use magnus::{
5
- function, memoize, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
6
- TypedData,
5
+ data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
6
+ Ruby, TypedData,
7
7
  };
8
8
  use serde::{Deserialize, Serialize};
9
9
  use tk::decoders::bpe::BPEDecoder;
@@ -19,7 +19,7 @@ use tk::Decoder;
19
19
  use tk::normalizers::replace::Replace;
20
20
 
21
21
  use super::utils::*;
22
- use super::{RbError, RbResult};
22
+ use super::{DECODERS, RbError, RbResult};
23
23
 
24
24
  #[derive(DataTypeFunctions, Clone, Deserialize, Serialize)]
25
25
  pub struct RbDecoder {
@@ -260,74 +260,85 @@ impl Decoder for RbDecoderWrapper {
260
260
  }
261
261
 
262
262
  unsafe impl TypedData for RbDecoder {
263
- fn class() -> RClass {
264
- *memoize!(RClass: {
265
- let class: RClass = crate::decoders().const_get("Decoder").unwrap();
266
- class.undef_alloc_func();
267
- class
268
- })
263
+ fn class(ruby: &Ruby) -> RClass {
264
+ static CLASS: Lazy<RClass> = Lazy::new(|ruby| {
265
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Decoder").unwrap();
266
+ class.undef_default_alloc_func();
267
+ class
268
+ });
269
+ ruby.get_inner(&CLASS)
269
270
  }
270
271
 
271
272
  fn data_type() -> &'static DataType {
272
- memoize!(DataType: DataTypeBuilder::<RbDecoder>::new("Tokenizers::Decoders::Decoder").build())
273
- }
274
-
275
- fn class_for(value: &Self) -> RClass {
273
+ static DATA_TYPE: DataType = data_type_builder!(RbDecoder, "Tokenizers::Decoders::Decoder").build();
274
+ &DATA_TYPE
275
+ }
276
+
277
+ fn class_for(ruby: &Ruby, value: &Self) -> RClass {
278
+ static BPE_DECODER: Lazy<RClass> = Lazy::new(|ruby| {
279
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("BPEDecoder").unwrap();
280
+ class.undef_default_alloc_func();
281
+ class
282
+ });
283
+ static BYTE_FALLBACK: Lazy<RClass> = Lazy::new(|ruby| {
284
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("ByteFallback").unwrap();
285
+ class.undef_default_alloc_func();
286
+ class
287
+ });
288
+ static BYTE_LEVEL: Lazy<RClass> = Lazy::new(|ruby| {
289
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("ByteLevel").unwrap();
290
+ class.undef_default_alloc_func();
291
+ class
292
+ });
293
+ static CTC: Lazy<RClass> = Lazy::new(|ruby| {
294
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("CTC").unwrap();
295
+ class.undef_default_alloc_func();
296
+ class
297
+ });
298
+ static FUSE: Lazy<RClass> = Lazy::new(|ruby| {
299
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Fuse").unwrap();
300
+ class.undef_default_alloc_func();
301
+ class
302
+ });
303
+ static METASPACE: Lazy<RClass> = Lazy::new(|ruby| {
304
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Metaspace").unwrap();
305
+ class.undef_default_alloc_func();
306
+ class
307
+ });
308
+ static REPLACE: Lazy<RClass> = Lazy::new(|ruby| {
309
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Replace").unwrap();
310
+ class.undef_default_alloc_func();
311
+ class
312
+ });
313
+ static STRIP: Lazy<RClass> = Lazy::new(|ruby| {
314
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("Strip").unwrap();
315
+ class.undef_default_alloc_func();
316
+ class
317
+ });
318
+ static WORD_PIECE: Lazy<RClass> = Lazy::new(|ruby| {
319
+ let class: RClass = ruby.get_inner(&DECODERS).const_get("WordPiece").unwrap();
320
+ class.undef_default_alloc_func();
321
+ class
322
+ });
276
323
  match &value.decoder {
277
324
  RbDecoderWrapper::Wrapped(inner) => match *inner.read().unwrap() {
278
- DecoderWrapper::BPE(_) => *memoize!(RClass: {
279
- let class: RClass = crate::decoders().const_get("BPEDecoder").unwrap();
280
- class.undef_alloc_func();
281
- class
282
- }),
283
- DecoderWrapper::ByteFallback(_) => *memoize!(RClass: {
284
- let class: RClass = crate::decoders().const_get("ByteFallback").unwrap();
285
- class.undef_alloc_func();
286
- class
287
- }),
288
- DecoderWrapper::ByteLevel(_) => *memoize!(RClass: {
289
- let class: RClass = crate::decoders().const_get("ByteLevel").unwrap();
290
- class.undef_alloc_func();
291
- class
292
- }),
293
- DecoderWrapper::CTC(_) => *memoize!(RClass: {
294
- let class: RClass = crate::decoders().const_get("CTC").unwrap();
295
- class.undef_alloc_func();
296
- class
297
- }),
298
- DecoderWrapper::Fuse(_) => *memoize!(RClass: {
299
- let class: RClass = crate::decoders().const_get("Fuse").unwrap();
300
- class.undef_alloc_func();
301
- class
302
- }),
303
- DecoderWrapper::Metaspace(_) => *memoize!(RClass: {
304
- let class: RClass = crate::decoders().const_get("Metaspace").unwrap();
305
- class.undef_alloc_func();
306
- class
307
- }),
308
- DecoderWrapper::Replace(_) => *memoize!(RClass: {
309
- let class: RClass = crate::decoders().const_get("Replace").unwrap();
310
- class.undef_alloc_func();
311
- class
312
- }),
313
- DecoderWrapper::Strip(_) => *memoize!(RClass: {
314
- let class: RClass = crate::decoders().const_get("Strip").unwrap();
315
- class.undef_alloc_func();
316
- class
317
- }),
318
- DecoderWrapper::WordPiece(_) => *memoize!(RClass: {
319
- let class: RClass = crate::decoders().const_get("WordPiece").unwrap();
320
- class.undef_alloc_func();
321
- class
322
- }),
325
+ DecoderWrapper::BPE(_) => ruby.get_inner(&BPE_DECODER),
326
+ DecoderWrapper::ByteFallback(_) => ruby.get_inner(&BYTE_FALLBACK),
327
+ DecoderWrapper::ByteLevel(_) => ruby.get_inner(&BYTE_LEVEL),
328
+ DecoderWrapper::CTC(_) => ruby.get_inner(&CTC),
329
+ DecoderWrapper::Fuse(_) => ruby.get_inner(&FUSE),
330
+ DecoderWrapper::Metaspace(_) => ruby.get_inner(&METASPACE),
331
+ DecoderWrapper::Replace(_) => ruby.get_inner(&REPLACE),
332
+ DecoderWrapper::Strip(_) => ruby.get_inner(&STRIP),
333
+ DecoderWrapper::WordPiece(_) => ruby.get_inner(&WORD_PIECE),
323
334
  _ => todo!(),
324
335
  },
325
336
  }
326
337
  }
327
338
  }
328
339
 
329
- pub fn decoders(module: &RModule) -> RbResult<()> {
330
- let decoder = module.define_class("Decoder", Default::default())?;
340
+ pub fn init_decoders(ruby: &Ruby, module: &RModule) -> RbResult<()> {
341
+ let decoder = module.define_class("Decoder", ruby.class_object())?;
331
342
 
332
343
  let class = module.define_class("BPEDecoder", decoder)?;
333
344
  class.define_singleton_method("_new", function!(RbBPEDecoder::new, 1))?;
@@ -1,6 +1,6 @@
1
- use magnus::{memoize, Error, ExceptionClass, Module};
1
+ use magnus::{prelude::*, value::Lazy, Error, ExceptionClass, Ruby};
2
2
 
3
- use super::module;
3
+ use super::TOKENIZERS;
4
4
 
5
5
  pub struct RbError {}
6
6
 
@@ -11,6 +11,8 @@ impl RbError {
11
11
  }
12
12
  }
13
13
 
14
+ static ERROR: Lazy<ExceptionClass> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Error").unwrap());
15
+
14
16
  fn error() -> ExceptionClass {
15
- *memoize!(ExceptionClass: module().const_get("Error").unwrap())
17
+ Ruby::get().unwrap().get_inner(&ERROR)
16
18
  }
@@ -1,3 +1,5 @@
1
+ #![allow(clippy::new_ret_no_self)]
2
+
1
3
  extern crate tokenizers as tk;
2
4
 
3
5
  mod decoders;
@@ -16,43 +18,29 @@ use error::RbError;
16
18
  use tokenizer::RbTokenizer;
17
19
  use utils::RbRegex;
18
20
 
19
- use magnus::{define_module, function, memoize, method, prelude::*, Error, RModule};
21
+ use magnus::{function, method, prelude::*, value::Lazy, Error, RModule, Ruby};
20
22
 
21
23
  type RbResult<T> = Result<T, Error>;
22
24
 
23
- fn module() -> RModule {
24
- *memoize!(RModule: define_module("Tokenizers").unwrap())
25
- }
25
+ static TOKENIZERS: Lazy<RModule> = Lazy::new(|ruby| ruby.class_object().const_get("Tokenizers").unwrap());
26
26
 
27
- fn decoders() -> RModule {
28
- *memoize!(RModule: module().const_get("Decoders").unwrap())
29
- }
27
+ static DECODERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Decoders").unwrap());
30
28
 
31
- fn models() -> RModule {
32
- *memoize!(RModule: module().const_get("Models").unwrap())
33
- }
29
+ static MODELS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Models").unwrap());
34
30
 
35
- fn normalizers() -> RModule {
36
- *memoize!(RModule: module().const_get("Normalizers").unwrap())
37
- }
31
+ static NORMALIZERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Normalizers").unwrap());
38
32
 
39
- fn pre_tokenizers() -> RModule {
40
- *memoize!(RModule: module().const_get("PreTokenizers").unwrap())
41
- }
33
+ static PRE_TOKENIZERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("PreTokenizers").unwrap());
42
34
 
43
- fn processors() -> RModule {
44
- *memoize!(RModule: module().const_get("Processors").unwrap())
45
- }
35
+ static PROCESSORS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Processors").unwrap());
46
36
 
47
- fn trainers() -> RModule {
48
- *memoize!(RModule: module().const_get("Trainers").unwrap())
49
- }
37
+ static TRAINERS: Lazy<RModule> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Trainers").unwrap());
50
38
 
51
39
  #[magnus::init]
52
- fn init() -> RbResult<()> {
53
- let module = module();
40
+ fn init(ruby: &Ruby) -> RbResult<()> {
41
+ let module = ruby.define_module("Tokenizers")?;
54
42
 
55
- let class = module.define_class("Tokenizer", Default::default())?;
43
+ let class = module.define_class("Tokenizer", ruby.class_object())?;
56
44
  class.define_singleton_method("new", function!(RbTokenizer::from_model, 1))?;
57
45
  class.define_singleton_method("from_file", function!(RbTokenizer::from_file, 1))?;
58
46
  class.define_method(
@@ -86,7 +74,7 @@ fn init() -> RbResult<()> {
86
74
  class.define_method("_vocab_size", method!(RbTokenizer::vocab_size, 1))?;
87
75
  class.define_method("_to_s", method!(RbTokenizer::to_str, 1))?;
88
76
 
89
- let class = module.define_class("Encoding", Default::default())?;
77
+ let class = module.define_class("Encoding", ruby.class_object())?;
90
78
  class.define_method("n_sequences", method!(RbEncoding::n_sequences, 0))?;
91
79
  class.define_method("ids", method!(RbEncoding::ids, 0))?;
92
80
  class.define_method("tokens", method!(RbEncoding::tokens, 0))?;
@@ -111,7 +99,7 @@ fn init() -> RbResult<()> {
111
99
  class.define_method("_char_to_token", method!(RbEncoding::char_to_token, 2))?;
112
100
  class.define_method("_char_to_word", method!(RbEncoding::char_to_word, 2))?;
113
101
 
114
- let class = module.define_class("Regex", Default::default())?;
102
+ let class = module.define_class("Regex", ruby.class_object())?;
115
103
  class.define_singleton_method("new", function!(RbRegex::new, 1))?;
116
104
 
117
105
  let models = module.define_module("Models")?;
@@ -121,12 +109,12 @@ fn init() -> RbResult<()> {
121
109
  let normalizers = module.define_module("Normalizers")?;
122
110
  let trainers = module.define_module("Trainers")?;
123
111
 
124
- models::models(&models)?;
125
- pre_tokenizers::pre_tokenizers(&pre_tokenizers)?;
126
- decoders::decoders(&decoders)?;
127
- processors::processors(&processors)?;
128
- normalizers::normalizers(&normalizers)?;
129
- trainers::trainers(&trainers)?;
112
+ models::init_models(ruby, &models)?;
113
+ pre_tokenizers::init_pre_tokenizers(ruby, &pre_tokenizers)?;
114
+ decoders::init_decoders(ruby, &decoders)?;
115
+ processors::init_processors(ruby, &processors)?;
116
+ normalizers::init_normalizers(ruby, &normalizers)?;
117
+ trainers::init_trainers(ruby, &trainers)?;
130
118
 
131
119
  Ok(())
132
120
  }