sorbet-baml 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +94 -0
  3. data/README.md +315 -122
  4. data/Rakefile +2 -2
  5. data/docs-site/.gitignore +48 -0
  6. data/docs-site/Gemfile +5 -0
  7. data/docs-site/Gemfile.lock +140 -0
  8. data/docs-site/Rakefile +3 -0
  9. data/docs-site/bridgetown.config.yml +15 -0
  10. data/docs-site/config/initializers.rb +9 -0
  11. data/docs-site/config/puma.rb +9 -0
  12. data/docs-site/config.ru +5 -0
  13. data/docs-site/esbuild.config.js +11 -0
  14. data/docs-site/frontend/javascript/index.js +22 -0
  15. data/docs-site/frontend/styles/index.css +61 -0
  16. data/docs-site/package.json +18 -0
  17. data/docs-site/postcss.config.js +6 -0
  18. data/docs-site/server/roda_app.rb +9 -0
  19. data/docs-site/src/_components/head.liquid +26 -0
  20. data/docs-site/src/_components/nav.liquid +68 -0
  21. data/docs-site/src/_layouts/default.liquid +27 -0
  22. data/docs-site/src/_layouts/doc.liquid +39 -0
  23. data/docs-site/src/advanced-usage.md +598 -0
  24. data/docs-site/src/getting-started.md +170 -0
  25. data/docs-site/src/index.md +183 -0
  26. data/docs-site/src/troubleshooting.md +317 -0
  27. data/docs-site/src/type-mapping.md +236 -0
  28. data/docs-site/tailwind.config.js +85 -0
  29. data/examples/description_parameters.rb +49 -0
  30. data/lib/sorbet_baml/comment_extractor.rb +51 -54
  31. data/lib/sorbet_baml/converter.rb +69 -35
  32. data/lib/sorbet_baml/dependency_resolver.rb +11 -11
  33. data/lib/sorbet_baml/description_extension.rb +34 -0
  34. data/lib/sorbet_baml/description_extractor.rb +34 -0
  35. data/lib/sorbet_baml/dspy_tool_converter.rb +97 -0
  36. data/lib/sorbet_baml/dspy_tool_extensions.rb +23 -0
  37. data/lib/sorbet_baml/enum_extensions.rb +2 -2
  38. data/lib/sorbet_baml/struct_extensions.rb +2 -2
  39. data/lib/sorbet_baml/tool_extensions.rb +23 -0
  40. data/lib/sorbet_baml/type_mapper.rb +35 -37
  41. data/lib/sorbet_baml/version.rb +1 -1
  42. data/lib/sorbet_baml.rb +41 -10
  43. data/sorbet/config +2 -0
  44. data/sorbet/rbi/gems/anthropic@1.5.0.rbi +21252 -0
  45. data/sorbet/rbi/gems/async@2.27.3.rbi +9 -0
  46. data/sorbet/rbi/gems/bigdecimal@3.2.2.rbi +9 -0
  47. data/sorbet/rbi/gems/concurrent-ruby@1.3.5.rbi +424 -0
  48. data/sorbet/rbi/gems/connection_pool@2.5.3.rbi +9 -0
  49. data/sorbet/rbi/gems/console@1.33.0.rbi +9 -0
  50. data/sorbet/rbi/gems/dry-configurable@1.3.0.rbi +672 -0
  51. data/sorbet/rbi/gems/dry-core@1.1.0.rbi +1729 -0
  52. data/sorbet/rbi/gems/dry-logger@1.1.0.rbi +1317 -0
  53. data/sorbet/rbi/gems/dspy@0.19.1.rbi +6677 -0
  54. data/sorbet/rbi/gems/ffi@1.17.2.rbi +2174 -0
  55. data/sorbet/rbi/gems/fiber-annotation@0.2.0.rbi +9 -0
  56. data/sorbet/rbi/gems/fiber-local@1.1.0.rbi +9 -0
  57. data/sorbet/rbi/gems/fiber-storage@1.0.1.rbi +9 -0
  58. data/sorbet/rbi/gems/google-protobuf@4.32.0.rbi +9 -0
  59. data/sorbet/rbi/gems/googleapis-common-protos-types@1.20.0.rbi +9 -0
  60. data/sorbet/rbi/gems/informers@1.2.1.rbi +1875 -0
  61. data/sorbet/rbi/gems/io-event@1.12.1.rbi +9 -0
  62. data/sorbet/rbi/gems/metrics@0.13.0.rbi +9 -0
  63. data/sorbet/rbi/gems/onnxruntime@0.10.0.rbi +304 -0
  64. data/sorbet/rbi/gems/openai@0.16.0.rbi +68055 -0
  65. data/sorbet/rbi/gems/opentelemetry-api@1.6.0.rbi +9 -0
  66. data/sorbet/rbi/gems/opentelemetry-common@0.22.0.rbi +9 -0
  67. data/sorbet/rbi/gems/opentelemetry-exporter-otlp@0.30.0.rbi +9 -0
  68. data/sorbet/rbi/gems/opentelemetry-registry@0.4.0.rbi +9 -0
  69. data/sorbet/rbi/gems/opentelemetry-sdk@1.8.1.rbi +9 -0
  70. data/sorbet/rbi/gems/opentelemetry-semantic_conventions@1.11.0.rbi +9 -0
  71. data/sorbet/rbi/gems/polars-df@0.20.0.rbi +9 -0
  72. data/sorbet/rbi/gems/sorbet-result@1.4.0.rbi +242 -0
  73. data/sorbet/rbi/gems/sorbet-schema@0.9.2.rbi +743 -0
  74. data/sorbet/rbi/gems/sorbet-struct-comparable@1.3.0.rbi +48 -0
  75. data/sorbet/rbi/gems/tokenizers@0.5.5.rbi +754 -0
  76. data/sorbet/rbi/gems/traces@0.17.0.rbi +9 -0
  77. data/sorbet/rbi/gems/zeitwerk@2.7.3.rbi +1429 -0
  78. metadata +67 -7
  79. data/docs/README.md +0 -117
  80. data/docs/advanced-usage.md +0 -427
  81. data/docs/getting-started.md +0 -91
  82. data/docs/troubleshooting.md +0 -291
  83. data/docs/type-mapping.md +0 -192
@@ -0,0 +1,754 @@
1
+ # typed: true
2
+
3
+ # DO NOT EDIT MANUALLY
4
+ # This is an autogenerated file for types exported from the `tokenizers` gem.
5
+ # Please instead update this file by running `bin/tapioca gem tokenizers`.
6
+
7
+
8
+ # source://tokenizers//lib/tokenizers/decoders/bpe_decoder.rb#1
9
+ module Tokenizers
10
+ class << self
11
+ # source://tokenizers//lib/tokenizers.rb#59
12
+ def from_file(*_arg0, **_arg1, &_arg2); end
13
+
14
+ # source://tokenizers//lib/tokenizers.rb#55
15
+ def from_pretrained(*_arg0, **_arg1, &_arg2); end
16
+ end
17
+ end
18
+
19
+ # source://tokenizers//lib/tokenizers/added_token.rb#2
20
+ class Tokenizers::AddedToken
21
+ def content; end
22
+ def lstrip; end
23
+ def normalized; end
24
+ def rstrip; end
25
+ def single_word; end
26
+ def special; end
27
+
28
+ class << self
29
+ def _new(_arg0, _arg1); end
30
+
31
+ # source://tokenizers//lib/tokenizers/added_token.rb#3
32
+ def new(content, **kwargs); end
33
+ end
34
+ end
35
+
36
+ # source://tokenizers//lib/tokenizers/char_bpe_tokenizer.rb#2
37
+ class Tokenizers::CharBPETokenizer
38
+ # @return [CharBPETokenizer] a new instance of CharBPETokenizer
39
+ #
40
+ # source://tokenizers//lib/tokenizers/char_bpe_tokenizer.rb#3
41
+ def initialize(vocab, merges, unk_token: T.unsafe(nil), suffix: T.unsafe(nil)); end
42
+
43
+ # source://tokenizers//lib/tokenizers/char_bpe_tokenizer.rb#18
44
+ def decode(ids); end
45
+
46
+ # source://tokenizers//lib/tokenizers/char_bpe_tokenizer.rb#14
47
+ def encode(text, **options); end
48
+ end
49
+
50
+ # source://tokenizers//lib/tokenizers/decoders/bpe_decoder.rb#2
51
+ module Tokenizers::Decoders; end
52
+
53
+ # source://tokenizers//lib/tokenizers/decoders/bpe_decoder.rb#3
54
+ class Tokenizers::Decoders::BPEDecoder < ::Tokenizers::Decoders::Decoder
55
+ def suffix; end
56
+ def suffix=(_arg0); end
57
+
58
+ class << self
59
+ def _new(_arg0); end
60
+
61
+ # source://tokenizers//lib/tokenizers/decoders/bpe_decoder.rb#4
62
+ def new(suffix: T.unsafe(nil)); end
63
+ end
64
+ end
65
+
66
+ class Tokenizers::Decoders::ByteFallback < ::Tokenizers::Decoders::Decoder
67
+ class << self
68
+ def new; end
69
+ end
70
+ end
71
+
72
+ class Tokenizers::Decoders::ByteLevel < ::Tokenizers::Decoders::Decoder
73
+ class << self
74
+ def new; end
75
+ end
76
+ end
77
+
78
+ # source://tokenizers//lib/tokenizers/decoders/ctc.rb#3
79
+ class Tokenizers::Decoders::CTC < ::Tokenizers::Decoders::Decoder
80
+ def cleanup; end
81
+ def cleanup=(_arg0); end
82
+ def pad_token; end
83
+ def pad_token=(_arg0); end
84
+ def word_delimiter_token; end
85
+ def word_delimiter_token=(_arg0); end
86
+
87
+ class << self
88
+ def _new(_arg0, _arg1, _arg2); end
89
+
90
+ # source://tokenizers//lib/tokenizers/decoders/ctc.rb#4
91
+ def new(pad_token: T.unsafe(nil), word_delimiter_token: T.unsafe(nil), cleanup: T.unsafe(nil)); end
92
+ end
93
+ end
94
+
95
+ class Tokenizers::Decoders::Decoder
96
+ def decode(_arg0); end
97
+ end
98
+
99
+ class Tokenizers::Decoders::Fuse < ::Tokenizers::Decoders::Decoder
100
+ class << self
101
+ def new; end
102
+ end
103
+ end
104
+
105
+ # source://tokenizers//lib/tokenizers/decoders/metaspace.rb#3
106
+ class Tokenizers::Decoders::Metaspace < ::Tokenizers::Decoders::Decoder
107
+ def prepend_scheme; end
108
+ def prepend_scheme=(_arg0); end
109
+ def replacement; end
110
+ def replacement=(_arg0); end
111
+ def split; end
112
+ def split=(_arg0); end
113
+
114
+ class << self
115
+ def _new(_arg0, _arg1, _arg2); end
116
+
117
+ # source://tokenizers//lib/tokenizers/decoders/metaspace.rb#4
118
+ def new(replacement: T.unsafe(nil), prepend_scheme: T.unsafe(nil), split: T.unsafe(nil)); end
119
+ end
120
+ end
121
+
122
+ class Tokenizers::Decoders::Replace < ::Tokenizers::Decoders::Decoder
123
+ class << self
124
+ def new(_arg0, _arg1); end
125
+ end
126
+ end
127
+
128
+ # source://tokenizers//lib/tokenizers/decoders/strip.rb#3
129
+ class Tokenizers::Decoders::Strip < ::Tokenizers::Decoders::Decoder
130
+ def content; end
131
+ def content=(_arg0); end
132
+ def start; end
133
+ def start=(_arg0); end
134
+ def stop; end
135
+ def stop=(_arg0); end
136
+
137
+ class << self
138
+ def _new(_arg0, _arg1, _arg2); end
139
+
140
+ # source://tokenizers//lib/tokenizers/decoders/strip.rb#4
141
+ def new(content: T.unsafe(nil), start: T.unsafe(nil), stop: T.unsafe(nil)); end
142
+ end
143
+ end
144
+
145
+ # source://tokenizers//lib/tokenizers/decoders/word_piece.rb#3
146
+ class Tokenizers::Decoders::WordPiece < ::Tokenizers::Decoders::Decoder
147
+ def cleanup; end
148
+ def cleanup=(_arg0); end
149
+ def prefix; end
150
+ def prefix=(_arg0); end
151
+
152
+ class << self
153
+ def _new(_arg0, _arg1); end
154
+
155
+ # source://tokenizers//lib/tokenizers/decoders/word_piece.rb#4
156
+ def new(prefix: T.unsafe(nil), cleanup: T.unsafe(nil)); end
157
+ end
158
+ end
159
+
160
+ # source://tokenizers//lib/tokenizers/encoding.rb#2
161
+ class Tokenizers::Encoding
162
+ def _char_to_token(_arg0, _arg1); end
163
+ def _char_to_word(_arg0, _arg1); end
164
+ def _word_to_chars(_arg0, _arg1); end
165
+ def _word_to_tokens(_arg0, _arg1); end
166
+ def attention_mask; end
167
+
168
+ # source://tokenizers//lib/tokenizers/encoding.rb#11
169
+ def char_to_token(char_pos, sequence_index = T.unsafe(nil)); end
170
+
171
+ # source://tokenizers//lib/tokenizers/encoding.rb#15
172
+ def char_to_word(char_pos, sequence_index = T.unsafe(nil)); end
173
+
174
+ def ids; end
175
+ def n_sequences; end
176
+ def offsets; end
177
+ def overflowing; end
178
+ def sequence_ids; end
179
+ def special_tokens_mask; end
180
+ def token_to_chars(_arg0); end
181
+ def token_to_sequence(_arg0); end
182
+ def token_to_word(_arg0); end
183
+ def tokens; end
184
+ def type_ids; end
185
+ def word_ids; end
186
+
187
+ # source://tokenizers//lib/tokenizers/encoding.rb#7
188
+ def word_to_chars(word_index, sequence_index = T.unsafe(nil)); end
189
+
190
+ # source://tokenizers//lib/tokenizers/encoding.rb#3
191
+ def word_to_tokens(word_index, sequence_index = T.unsafe(nil)); end
192
+ end
193
+
194
+ # source://tokenizers//lib/tokenizers.rb#53
195
+ class Tokenizers::Error < ::StandardError; end
196
+
197
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#2
198
+ module Tokenizers::FromPretrained
199
+ # use Ruby for downloads
200
+ # this avoids the need to vendor OpenSSL on Linux
201
+ # and reduces the extension size by about half
202
+ #
203
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#9
204
+ def from_pretrained(identifier, revision: T.unsafe(nil), auth_token: T.unsafe(nil)); end
205
+
206
+ private
207
+
208
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#96
209
+ def cache_dir; end
210
+
211
+ # use same storage format as Rust version
212
+ # https://github.com/epwalsh/rust-cached-path
213
+ #
214
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#46
215
+ def cached_path(cache_dir, url, headers, options); end
216
+
217
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#115
218
+ def ensure_cache_dir; end
219
+
220
+ # @return [Boolean]
221
+ #
222
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#121
223
+ def mac?; end
224
+ end
225
+
226
+ # for user agent
227
+ #
228
+ # source://tokenizers//lib/tokenizers/from_pretrained.rb#4
229
+ Tokenizers::FromPretrained::TOKENIZERS_VERSION = T.let(T.unsafe(nil), String)
230
+
231
+ # source://tokenizers//lib/tokenizers/models/bpe.rb#2
232
+ module Tokenizers::Models; end
233
+
234
+ # source://tokenizers//lib/tokenizers/models/bpe.rb#3
235
+ class Tokenizers::Models::BPE < ::Tokenizers::Models::Model
236
+ def byte_fallback; end
237
+ def byte_fallback=(_arg0); end
238
+ def continuing_subword_prefix; end
239
+ def continuing_subword_prefix=(_arg0); end
240
+ def dropout; end
241
+ def dropout=(_arg0); end
242
+ def end_of_word_suffix; end
243
+ def end_of_word_suffix=(_arg0); end
244
+ def fuse_unk; end
245
+ def fuse_unk=(_arg0); end
246
+ def unk_token; end
247
+ def unk_token=(_arg0); end
248
+
249
+ class << self
250
+ def _from_file(_arg0, _arg1, _arg2); end
251
+ def _new(_arg0, _arg1, _arg2); end
252
+
253
+ # source://tokenizers//lib/tokenizers/models/bpe.rb#4
254
+ def new(vocab: T.unsafe(nil), merges: T.unsafe(nil), **kwargs); end
255
+ end
256
+ end
257
+
258
+ class Tokenizers::Models::Model; end
259
+
260
+ # source://tokenizers//lib/tokenizers/models/unigram.rb#3
261
+ class Tokenizers::Models::Unigram < ::Tokenizers::Models::Model
262
+ class << self
263
+ def _new(_arg0, _arg1, _arg2); end
264
+
265
+ # source://tokenizers//lib/tokenizers/models/unigram.rb#4
266
+ def new(vocab: T.unsafe(nil), unk_id: T.unsafe(nil), byte_fallback: T.unsafe(nil)); end
267
+ end
268
+ end
269
+
270
+ # source://tokenizers//lib/tokenizers/models/word_level.rb#3
271
+ class Tokenizers::Models::WordLevel < ::Tokenizers::Models::Model
272
+ def unk_token; end
273
+ def unk_token=(_arg0); end
274
+
275
+ class << self
276
+ def _from_file(_arg0, _arg1); end
277
+ def _new(_arg0, _arg1); end
278
+
279
+ # source://tokenizers//lib/tokenizers/models/word_level.rb#8
280
+ def from_file(vocab, unk_token: T.unsafe(nil)); end
281
+
282
+ # source://tokenizers//lib/tokenizers/models/word_level.rb#4
283
+ def new(vocab: T.unsafe(nil), unk_token: T.unsafe(nil)); end
284
+
285
+ def read_file(_arg0); end
286
+ end
287
+ end
288
+
289
+ # source://tokenizers//lib/tokenizers/models/word_piece.rb#3
290
+ class Tokenizers::Models::WordPiece < ::Tokenizers::Models::Model
291
+ def continuing_subword_prefix; end
292
+ def continuing_subword_prefix=(_arg0); end
293
+ def max_input_chars_per_word; end
294
+ def max_input_chars_per_word=(_arg0); end
295
+ def unk_token; end
296
+ def unk_token=(_arg0); end
297
+
298
+ class << self
299
+ def _from_file(_arg0, _arg1); end
300
+ def _new(_arg0, _arg1); end
301
+
302
+ # source://tokenizers//lib/tokenizers/models/word_piece.rb#4
303
+ def new(vocab: T.unsafe(nil), **kwargs); end
304
+ end
305
+ end
306
+
307
+ # source://tokenizers//lib/tokenizers/normalizers/bert_normalizer.rb#2
308
+ module Tokenizers::Normalizers; end
309
+
310
+ # source://tokenizers//lib/tokenizers/normalizers/bert_normalizer.rb#3
311
+ class Tokenizers::Normalizers::BertNormalizer < ::Tokenizers::Normalizers::Normalizer
312
+ def clean_text; end
313
+ def clean_text=(_arg0); end
314
+ def handle_chinese_chars; end
315
+ def handle_chinese_chars=(_arg0); end
316
+ def lowercase; end
317
+ def lowercase=(_arg0); end
318
+ def strip_accents; end
319
+ def strip_accents=(_arg0); end
320
+
321
+ class << self
322
+ def _new(_arg0, _arg1, _arg2, _arg3); end
323
+
324
+ # source://tokenizers//lib/tokenizers/normalizers/bert_normalizer.rb#4
325
+ def new(clean_text: T.unsafe(nil), handle_chinese_chars: T.unsafe(nil), strip_accents: T.unsafe(nil), lowercase: T.unsafe(nil)); end
326
+ end
327
+ end
328
+
329
+ class Tokenizers::Normalizers::Lowercase < ::Tokenizers::Normalizers::Normalizer
330
+ class << self
331
+ def new; end
332
+ end
333
+ end
334
+
335
+ class Tokenizers::Normalizers::NFC < ::Tokenizers::Normalizers::Normalizer
336
+ class << self
337
+ def new; end
338
+ end
339
+ end
340
+
341
+ class Tokenizers::Normalizers::NFD < ::Tokenizers::Normalizers::Normalizer
342
+ class << self
343
+ def new; end
344
+ end
345
+ end
346
+
347
+ class Tokenizers::Normalizers::NFKC < ::Tokenizers::Normalizers::Normalizer
348
+ class << self
349
+ def new; end
350
+ end
351
+ end
352
+
353
+ class Tokenizers::Normalizers::NFKD < ::Tokenizers::Normalizers::Normalizer
354
+ class << self
355
+ def new; end
356
+ end
357
+ end
358
+
359
+ class Tokenizers::Normalizers::Nmt < ::Tokenizers::Normalizers::Normalizer
360
+ class << self
361
+ def new; end
362
+ end
363
+ end
364
+
365
+ class Tokenizers::Normalizers::Normalizer
366
+ def normalize_str(_arg0); end
367
+ end
368
+
369
+ class Tokenizers::Normalizers::Precompiled < ::Tokenizers::Normalizers::Normalizer
370
+ class << self
371
+ def new(_arg0); end
372
+ end
373
+ end
374
+
375
+ # source://tokenizers//lib/tokenizers/normalizers/prepend.rb#3
376
+ class Tokenizers::Normalizers::Prepend < ::Tokenizers::Normalizers::Normalizer
377
+ def prepend; end
378
+ def prepend=(_arg0); end
379
+
380
+ class << self
381
+ def _new(_arg0); end
382
+
383
+ # source://tokenizers//lib/tokenizers/normalizers/prepend.rb#4
384
+ def new(prepend: T.unsafe(nil)); end
385
+ end
386
+ end
387
+
388
+ class Tokenizers::Normalizers::Replace < ::Tokenizers::Normalizers::Normalizer
389
+ class << self
390
+ def new(_arg0, _arg1); end
391
+ end
392
+ end
393
+
394
+ class Tokenizers::Normalizers::Sequence < ::Tokenizers::Normalizers::Normalizer
395
+ class << self
396
+ def new(_arg0); end
397
+ end
398
+ end
399
+
400
+ # source://tokenizers//lib/tokenizers/normalizers/strip.rb#3
401
+ class Tokenizers::Normalizers::Strip < ::Tokenizers::Normalizers::Normalizer
402
+ def left; end
403
+ def left=(_arg0); end
404
+ def right; end
405
+ def right=(_arg0); end
406
+
407
+ class << self
408
+ def _new(_arg0, _arg1); end
409
+
410
+ # source://tokenizers//lib/tokenizers/normalizers/strip.rb#4
411
+ def new(left: T.unsafe(nil), right: T.unsafe(nil)); end
412
+ end
413
+ end
414
+
415
+ class Tokenizers::Normalizers::StripAccents < ::Tokenizers::Normalizers::Normalizer
416
+ class << self
417
+ def new; end
418
+ end
419
+ end
420
+
421
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/byte_level.rb#2
422
+ module Tokenizers::PreTokenizers; end
423
+
424
+ class Tokenizers::PreTokenizers::BertPreTokenizer < ::Tokenizers::PreTokenizers::PreTokenizer
425
+ class << self
426
+ def new; end
427
+ end
428
+ end
429
+
430
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/byte_level.rb#3
431
+ class Tokenizers::PreTokenizers::ByteLevel < ::Tokenizers::PreTokenizers::PreTokenizer
432
+ def add_prefix_space; end
433
+ def add_prefix_space=(_arg0); end
434
+ def use_regex; end
435
+ def use_regex=(_arg0); end
436
+
437
+ class << self
438
+ def _new(_arg0, _arg1); end
439
+ def alphabet; end
440
+
441
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/byte_level.rb#4
442
+ def new(add_prefix_space: T.unsafe(nil), use_regex: T.unsafe(nil)); end
443
+ end
444
+ end
445
+
446
+ class Tokenizers::PreTokenizers::CharDelimiterSplit < ::Tokenizers::PreTokenizers::PreTokenizer
447
+ def delimiter; end
448
+ def delimiter=(_arg0); end
449
+
450
+ class << self
451
+ def new(_arg0); end
452
+ end
453
+ end
454
+
455
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/digits.rb#3
456
+ class Tokenizers::PreTokenizers::Digits < ::Tokenizers::PreTokenizers::PreTokenizer
457
+ def individual_digits; end
458
+ def individual_digits=(_arg0); end
459
+
460
+ class << self
461
+ def _new(_arg0); end
462
+
463
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/digits.rb#4
464
+ def new(individual_digits: T.unsafe(nil)); end
465
+ end
466
+ end
467
+
468
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/metaspace.rb#3
469
+ class Tokenizers::PreTokenizers::Metaspace < ::Tokenizers::PreTokenizers::PreTokenizer
470
+ def prepend_scheme; end
471
+ def prepend_scheme=(_arg0); end
472
+ def replacement; end
473
+ def replacement=(_arg0); end
474
+ def split; end
475
+ def split=(_arg0); end
476
+
477
+ class << self
478
+ def _new(_arg0, _arg1, _arg2); end
479
+
480
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/metaspace.rb#4
481
+ def new(replacement: T.unsafe(nil), prepend_scheme: T.unsafe(nil), split: T.unsafe(nil)); end
482
+ end
483
+ end
484
+
485
+ class Tokenizers::PreTokenizers::PreTokenizer
486
+ def pre_tokenize_str(_arg0); end
487
+ end
488
+
489
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/punctuation.rb#3
490
+ class Tokenizers::PreTokenizers::Punctuation < ::Tokenizers::PreTokenizers::PreTokenizer
491
+ class << self
492
+ def _new(_arg0); end
493
+
494
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/punctuation.rb#4
495
+ def new(behavior: T.unsafe(nil)); end
496
+ end
497
+ end
498
+
499
+ class Tokenizers::PreTokenizers::Sequence < ::Tokenizers::PreTokenizers::PreTokenizer
500
+ class << self
501
+ def new(_arg0); end
502
+ end
503
+ end
504
+
505
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/split.rb#3
506
+ class Tokenizers::PreTokenizers::Split < ::Tokenizers::PreTokenizers::PreTokenizer
507
+ class << self
508
+ def _new(_arg0, _arg1, _arg2); end
509
+
510
+ # source://tokenizers//lib/tokenizers/pre_tokenizers/split.rb#4
511
+ def new(pattern, behavior, invert: T.unsafe(nil)); end
512
+ end
513
+ end
514
+
515
+ class Tokenizers::PreTokenizers::UnicodeScripts < ::Tokenizers::PreTokenizers::PreTokenizer
516
+ class << self
517
+ def new; end
518
+ end
519
+ end
520
+
521
+ class Tokenizers::PreTokenizers::Whitespace < ::Tokenizers::PreTokenizers::PreTokenizer
522
+ class << self
523
+ def new; end
524
+ end
525
+ end
526
+
527
+ class Tokenizers::PreTokenizers::WhitespaceSplit < ::Tokenizers::PreTokenizers::PreTokenizer
528
+ class << self
529
+ def new; end
530
+ end
531
+ end
532
+
533
+ # source://tokenizers//lib/tokenizers/processors/byte_level.rb#2
534
+ module Tokenizers::Processors; end
535
+
536
+ class Tokenizers::Processors::BertProcessing < ::Tokenizers::Processors::PostProcessor
537
+ class << self
538
+ def new(_arg0, _arg1); end
539
+ end
540
+ end
541
+
542
+ # source://tokenizers//lib/tokenizers/processors/byte_level.rb#3
543
+ class Tokenizers::Processors::ByteLevel < ::Tokenizers::Processors::PostProcessor
544
+ class << self
545
+ def _new(_arg0); end
546
+
547
+ # source://tokenizers//lib/tokenizers/processors/byte_level.rb#4
548
+ def new(trim_offsets: T.unsafe(nil)); end
549
+ end
550
+ end
551
+
552
+ class Tokenizers::Processors::PostProcessor; end
553
+
554
+ # source://tokenizers//lib/tokenizers/processors/roberta_processing.rb#3
555
+ class Tokenizers::Processors::RobertaProcessing < ::Tokenizers::Processors::PostProcessor
556
+ class << self
557
+ def _new(_arg0, _arg1, _arg2, _arg3); end
558
+
559
+ # source://tokenizers//lib/tokenizers/processors/roberta_processing.rb#4
560
+ def new(sep, cls, trim_offsets: T.unsafe(nil), add_prefix_space: T.unsafe(nil)); end
561
+ end
562
+ end
563
+
564
+ # source://tokenizers//lib/tokenizers/processors/template_processing.rb#3
565
+ class Tokenizers::Processors::TemplateProcessing < ::Tokenizers::Processors::PostProcessor
566
+ class << self
567
+ def _new(_arg0, _arg1, _arg2); end
568
+
569
+ # source://tokenizers//lib/tokenizers/processors/template_processing.rb#4
570
+ def new(single: T.unsafe(nil), pair: T.unsafe(nil), special_tokens: T.unsafe(nil)); end
571
+ end
572
+ end
573
+
574
+ class Tokenizers::Regex
575
+ class << self
576
+ def new(_arg0); end
577
+ end
578
+ end
579
+
580
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#2
581
+ class Tokenizers::Tokenizer
582
+ extend ::Tokenizers::FromPretrained
583
+
584
+ def _decode(_arg0, _arg1); end
585
+ def _decode_batch(_arg0, _arg1); end
586
+ def _enable_padding(_arg0); end
587
+ def _enable_truncation(_arg0, _arg1); end
588
+ def _encode(_arg0, _arg1, _arg2, _arg3); end
589
+ def _encode_batch(_arg0, _arg1, _arg2); end
590
+ def _save(_arg0, _arg1); end
591
+ def _to_s(_arg0); end
592
+ def _vocab(_arg0); end
593
+ def _vocab_size(_arg0); end
594
+ def add_special_tokens(_arg0); end
595
+ def add_tokens(_arg0); end
596
+ def added_tokens_decoder; end
597
+
598
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#21
599
+ def decode(ids, skip_special_tokens: T.unsafe(nil)); end
600
+
601
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#25
602
+ def decode_batch(sequences, skip_special_tokens: T.unsafe(nil)); end
603
+
604
+ def decoder; end
605
+ def decoder=(_arg0); end
606
+
607
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#29
608
+ def enable_padding(**options); end
609
+
610
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#33
611
+ def enable_truncation(max_length, **options); end
612
+
613
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#13
614
+ def encode(sequence, pair = T.unsafe(nil), is_pretokenized: T.unsafe(nil), add_special_tokens: T.unsafe(nil)); end
615
+
616
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#17
617
+ def encode_batch(input, is_pretokenized: T.unsafe(nil), add_special_tokens: T.unsafe(nil)); end
618
+
619
+ def id_to_token(_arg0); end
620
+ def model; end
621
+ def model=(_arg0); end
622
+ def no_padding; end
623
+ def no_truncation; end
624
+ def normalizer; end
625
+ def normalizer=(_arg0); end
626
+ def num_special_tokens_to_add(_arg0); end
627
+ def padding; end
628
+ def post_processor; end
629
+ def post_processor=(_arg0); end
630
+ def pre_tokenizer; end
631
+ def pre_tokenizer=(_arg0); end
632
+
633
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#9
634
+ def save(path, pretty: T.unsafe(nil)); end
635
+
636
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#5
637
+ def to_s(pretty: T.unsafe(nil)); end
638
+
639
+ def token_to_id(_arg0); end
640
+ def train(_arg0, _arg1); end
641
+ def truncation; end
642
+
643
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#37
644
+ def vocab(with_added_tokens: T.unsafe(nil)); end
645
+
646
+ # source://tokenizers//lib/tokenizers/tokenizer.rb#41
647
+ def vocab_size(with_added_tokens: T.unsafe(nil)); end
648
+
649
+ class << self
650
+ def from_file(_arg0); end
651
+ def from_str(_arg0); end
652
+ def new(_arg0); end
653
+ end
654
+ end
655
+
656
+ # source://tokenizers//lib/tokenizers/trainers/bpe_trainer.rb#2
657
+ module Tokenizers::Trainers; end
658
+
659
+ # source://tokenizers//lib/tokenizers/trainers/bpe_trainer.rb#3
660
+ class Tokenizers::Trainers::BpeTrainer < ::Tokenizers::Trainers::Trainer
661
+ def continuing_subword_prefix; end
662
+ def continuing_subword_prefix=(_arg0); end
663
+ def end_of_word_suffix; end
664
+ def end_of_word_suffix=(_arg0); end
665
+ def initial_alphabet; end
666
+ def initial_alphabet=(_arg0); end
667
+ def limit_alphabet; end
668
+ def limit_alphabet=(_arg0); end
669
+ def min_frequency; end
670
+ def min_frequency=(_arg0); end
671
+ def show_progress; end
672
+ def show_progress=(_arg0); end
673
+ def special_tokens; end
674
+ def special_tokens=(_arg0); end
675
+ def vocab_size; end
676
+ def vocab_size=(_arg0); end
677
+
678
+ class << self
679
+ def _new(_arg0); end
680
+
681
+ # source://tokenizers//lib/tokenizers/trainers/bpe_trainer.rb#4
682
+ def new(**options); end
683
+ end
684
+ end
685
+
686
+ class Tokenizers::Trainers::Trainer; end
687
+
688
+ # source://tokenizers//lib/tokenizers/trainers/unigram_trainer.rb#3
689
+ class Tokenizers::Trainers::UnigramTrainer < ::Tokenizers::Trainers::Trainer
690
+ def initial_alphabet; end
691
+ def initial_alphabet=(_arg0); end
692
+ def show_progress; end
693
+ def show_progress=(_arg0); end
694
+ def special_tokens; end
695
+ def special_tokens=(_arg0); end
696
+ def vocab_size; end
697
+ def vocab_size=(_arg0); end
698
+
699
+ class << self
700
+ def _new(_arg0); end
701
+
702
+ # source://tokenizers//lib/tokenizers/trainers/unigram_trainer.rb#4
703
+ def new(vocab_size: T.unsafe(nil), show_progress: T.unsafe(nil), special_tokens: T.unsafe(nil), initial_alphabet: T.unsafe(nil), shrinking_factor: T.unsafe(nil), unk_token: T.unsafe(nil), max_piece_length: T.unsafe(nil), n_sub_iterations: T.unsafe(nil)); end
704
+ end
705
+ end
706
+
707
+ # source://tokenizers//lib/tokenizers/trainers/word_level_trainer.rb#3
708
+ class Tokenizers::Trainers::WordLevelTrainer < ::Tokenizers::Trainers::Trainer
709
+ def min_frequency; end
710
+ def min_frequency=(_arg0); end
711
+ def show_progress; end
712
+ def show_progress=(_arg0); end
713
+ def special_tokens; end
714
+ def special_tokens=(_arg0); end
715
+ def vocab_size; end
716
+ def vocab_size=(_arg0); end
717
+
718
+ class << self
719
+ def _new(_arg0); end
720
+
721
+ # source://tokenizers//lib/tokenizers/trainers/word_level_trainer.rb#4
722
+ def new(**options); end
723
+ end
724
+ end
725
+
726
+ # source://tokenizers//lib/tokenizers/trainers/word_piece_trainer.rb#3
727
+ class Tokenizers::Trainers::WordPieceTrainer < ::Tokenizers::Trainers::Trainer
728
+ def continuing_subword_prefix; end
729
+ def continuing_subword_prefix=(_arg0); end
730
+ def end_of_word_suffix; end
731
+ def end_of_word_suffix=(_arg0); end
732
+ def initial_alphabet; end
733
+ def initial_alphabet=(_arg0); end
734
+ def limit_alphabet; end
735
+ def limit_alphabet=(_arg0); end
736
+ def min_frequency; end
737
+ def min_frequency=(_arg0); end
738
+ def show_progress; end
739
+ def show_progress=(_arg0); end
740
+ def special_tokens; end
741
+ def special_tokens=(_arg0); end
742
+ def vocab_size; end
743
+ def vocab_size=(_arg0); end
744
+
745
+ class << self
746
+ def _new(_arg0); end
747
+
748
+ # source://tokenizers//lib/tokenizers/trainers/word_piece_trainer.rb#4
749
+ def new(vocab_size: T.unsafe(nil), min_frequency: T.unsafe(nil), show_progress: T.unsafe(nil), special_tokens: T.unsafe(nil), limit_alphabet: T.unsafe(nil), initial_alphabet: T.unsafe(nil), continuing_subword_prefix: T.unsafe(nil), end_of_word_suffix: T.unsafe(nil)); end
750
+ end
751
+ end
752
+
753
+ # source://tokenizers//lib/tokenizers/version.rb#2
754
+ Tokenizers::VERSION = T.let(T.unsafe(nil), String)