tokenizers 0.3.2-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +56 -0
  3. data/Cargo.lock +873 -0
  4. data/Cargo.toml +5 -0
  5. data/LICENSE-THIRD-PARTY.txt +17286 -0
  6. data/LICENSE.txt +202 -0
  7. data/README.md +69 -0
  8. data/lib/tokenizers/2.7/tokenizers.so +0 -0
  9. data/lib/tokenizers/3.0/tokenizers.so +0 -0
  10. data/lib/tokenizers/3.1/tokenizers.so +0 -0
  11. data/lib/tokenizers/3.2/tokenizers.so +0 -0
  12. data/lib/tokenizers/char_bpe_tokenizer.rb +22 -0
  13. data/lib/tokenizers/decoders/bpe_decoder.rb +9 -0
  14. data/lib/tokenizers/decoders/ctc.rb +9 -0
  15. data/lib/tokenizers/decoders/metaspace.rb +9 -0
  16. data/lib/tokenizers/decoders/word_piece.rb +9 -0
  17. data/lib/tokenizers/encoding.rb +19 -0
  18. data/lib/tokenizers/from_pretrained.rb +119 -0
  19. data/lib/tokenizers/models/bpe.rb +9 -0
  20. data/lib/tokenizers/models/unigram.rb +9 -0
  21. data/lib/tokenizers/models/word_level.rb +13 -0
  22. data/lib/tokenizers/models/word_piece.rb +9 -0
  23. data/lib/tokenizers/normalizers/bert_normalizer.rb +9 -0
  24. data/lib/tokenizers/normalizers/strip.rb +9 -0
  25. data/lib/tokenizers/pre_tokenizers/byte_level.rb +9 -0
  26. data/lib/tokenizers/pre_tokenizers/digits.rb +9 -0
  27. data/lib/tokenizers/pre_tokenizers/metaspace.rb +9 -0
  28. data/lib/tokenizers/pre_tokenizers/punctuation.rb +9 -0
  29. data/lib/tokenizers/pre_tokenizers/split.rb +9 -0
  30. data/lib/tokenizers/processors/byte_level.rb +9 -0
  31. data/lib/tokenizers/processors/roberta_processing.rb +9 -0
  32. data/lib/tokenizers/processors/template_processing.rb +9 -0
  33. data/lib/tokenizers/tokenizer.rb +45 -0
  34. data/lib/tokenizers/trainers/bpe_trainer.rb +9 -0
  35. data/lib/tokenizers/trainers/unigram_trainer.rb +26 -0
  36. data/lib/tokenizers/trainers/word_level_trainer.rb +9 -0
  37. data/lib/tokenizers/trainers/word_piece_trainer.rb +26 -0
  38. data/lib/tokenizers/version.rb +3 -0
  39. data/lib/tokenizers.rb +59 -0
  40. metadata +83 -0
data/Cargo.lock ADDED
@@ -0,0 +1,873 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 3
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "0.7.20"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "autocfg"
16
+ version = "1.1.0"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
19
+
20
+ [[package]]
21
+ name = "base64"
22
+ version = "0.13.1"
23
+ source = "registry+https://github.com/rust-lang/crates.io-index"
24
+ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
25
+
26
+ [[package]]
27
+ name = "bindgen"
28
+ version = "0.60.1"
29
+ source = "registry+https://github.com/rust-lang/crates.io-index"
30
+ checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
31
+ dependencies = [
32
+ "bitflags",
33
+ "cexpr",
34
+ "clang-sys",
35
+ "lazy_static",
36
+ "lazycell",
37
+ "peeking_take_while",
38
+ "proc-macro2",
39
+ "quote",
40
+ "regex",
41
+ "rustc-hash",
42
+ "shlex",
43
+ ]
44
+
45
+ [[package]]
46
+ name = "bitflags"
47
+ version = "1.3.2"
48
+ source = "registry+https://github.com/rust-lang/crates.io-index"
49
+ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
50
+
51
+ [[package]]
52
+ name = "cc"
53
+ version = "1.0.79"
54
+ source = "registry+https://github.com/rust-lang/crates.io-index"
55
+ checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
56
+
57
+ [[package]]
58
+ name = "cexpr"
59
+ version = "0.6.0"
60
+ source = "registry+https://github.com/rust-lang/crates.io-index"
61
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
62
+ dependencies = [
63
+ "nom",
64
+ ]
65
+
66
+ [[package]]
67
+ name = "cfg-if"
68
+ version = "1.0.0"
69
+ source = "registry+https://github.com/rust-lang/crates.io-index"
70
+ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
71
+
72
+ [[package]]
73
+ name = "clang-sys"
74
+ version = "1.4.0"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3"
77
+ dependencies = [
78
+ "glob",
79
+ "libc",
80
+ "libloading",
81
+ ]
82
+
83
+ [[package]]
84
+ name = "console"
85
+ version = "0.15.5"
86
+ source = "registry+https://github.com/rust-lang/crates.io-index"
87
+ checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60"
88
+ dependencies = [
89
+ "encode_unicode",
90
+ "lazy_static",
91
+ "libc",
92
+ "unicode-width",
93
+ "windows-sys",
94
+ ]
95
+
96
+ [[package]]
97
+ name = "crossbeam-channel"
98
+ version = "0.5.6"
99
+ source = "registry+https://github.com/rust-lang/crates.io-index"
100
+ checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
101
+ dependencies = [
102
+ "cfg-if",
103
+ "crossbeam-utils",
104
+ ]
105
+
106
+ [[package]]
107
+ name = "crossbeam-deque"
108
+ version = "0.8.2"
109
+ source = "registry+https://github.com/rust-lang/crates.io-index"
110
+ checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
111
+ dependencies = [
112
+ "cfg-if",
113
+ "crossbeam-epoch",
114
+ "crossbeam-utils",
115
+ ]
116
+
117
+ [[package]]
118
+ name = "crossbeam-epoch"
119
+ version = "0.9.13"
120
+ source = "registry+https://github.com/rust-lang/crates.io-index"
121
+ checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
122
+ dependencies = [
123
+ "autocfg",
124
+ "cfg-if",
125
+ "crossbeam-utils",
126
+ "memoffset",
127
+ "scopeguard",
128
+ ]
129
+
130
+ [[package]]
131
+ name = "crossbeam-utils"
132
+ version = "0.8.14"
133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
134
+ checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
135
+ dependencies = [
136
+ "cfg-if",
137
+ ]
138
+
139
+ [[package]]
140
+ name = "darling"
141
+ version = "0.14.3"
142
+ source = "registry+https://github.com/rust-lang/crates.io-index"
143
+ checksum = "c0808e1bd8671fb44a113a14e13497557533369847788fa2ae912b6ebfce9fa8"
144
+ dependencies = [
145
+ "darling_core",
146
+ "darling_macro",
147
+ ]
148
+
149
+ [[package]]
150
+ name = "darling_core"
151
+ version = "0.14.3"
152
+ source = "registry+https://github.com/rust-lang/crates.io-index"
153
+ checksum = "001d80444f28e193f30c2f293455da62dcf9a6b29918a4253152ae2b1de592cb"
154
+ dependencies = [
155
+ "fnv",
156
+ "ident_case",
157
+ "proc-macro2",
158
+ "quote",
159
+ "strsim",
160
+ "syn",
161
+ ]
162
+
163
+ [[package]]
164
+ name = "darling_macro"
165
+ version = "0.14.3"
166
+ source = "registry+https://github.com/rust-lang/crates.io-index"
167
+ checksum = "b36230598a2d5de7ec1c6f51f72d8a99a9208daff41de2084d06e3fd3ea56685"
168
+ dependencies = [
169
+ "darling_core",
170
+ "quote",
171
+ "syn",
172
+ ]
173
+
174
+ [[package]]
175
+ name = "derive_builder"
176
+ version = "0.12.0"
177
+ source = "registry+https://github.com/rust-lang/crates.io-index"
178
+ checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
179
+ dependencies = [
180
+ "derive_builder_macro",
181
+ ]
182
+
183
+ [[package]]
184
+ name = "derive_builder_core"
185
+ version = "0.12.0"
186
+ source = "registry+https://github.com/rust-lang/crates.io-index"
187
+ checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f"
188
+ dependencies = [
189
+ "darling",
190
+ "proc-macro2",
191
+ "quote",
192
+ "syn",
193
+ ]
194
+
195
+ [[package]]
196
+ name = "derive_builder_macro"
197
+ version = "0.12.0"
198
+ source = "registry+https://github.com/rust-lang/crates.io-index"
199
+ checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
200
+ dependencies = [
201
+ "derive_builder_core",
202
+ "syn",
203
+ ]
204
+
205
+ [[package]]
206
+ name = "either"
207
+ version = "1.8.1"
208
+ source = "registry+https://github.com/rust-lang/crates.io-index"
209
+ checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
210
+
211
+ [[package]]
212
+ name = "encode_unicode"
213
+ version = "0.3.6"
214
+ source = "registry+https://github.com/rust-lang/crates.io-index"
215
+ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
216
+
217
+ [[package]]
218
+ name = "esaxx-rs"
219
+ version = "0.1.8"
220
+ source = "registry+https://github.com/rust-lang/crates.io-index"
221
+ checksum = "1f748b253ceca9fed5f42f8b5ceb3851e93102199bc25b64b65369f76e5c0a35"
222
+ dependencies = [
223
+ "cc",
224
+ ]
225
+
226
+ [[package]]
227
+ name = "fnv"
228
+ version = "1.0.7"
229
+ source = "registry+https://github.com/rust-lang/crates.io-index"
230
+ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
231
+
232
+ [[package]]
233
+ name = "getrandom"
234
+ version = "0.2.8"
235
+ source = "registry+https://github.com/rust-lang/crates.io-index"
236
+ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
237
+ dependencies = [
238
+ "cfg-if",
239
+ "libc",
240
+ "wasi",
241
+ ]
242
+
243
+ [[package]]
244
+ name = "glob"
245
+ version = "0.3.1"
246
+ source = "registry+https://github.com/rust-lang/crates.io-index"
247
+ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
248
+
249
+ [[package]]
250
+ name = "hermit-abi"
251
+ version = "0.2.6"
252
+ source = "registry+https://github.com/rust-lang/crates.io-index"
253
+ checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
254
+ dependencies = [
255
+ "libc",
256
+ ]
257
+
258
+ [[package]]
259
+ name = "ident_case"
260
+ version = "1.0.1"
261
+ source = "registry+https://github.com/rust-lang/crates.io-index"
262
+ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
263
+
264
+ [[package]]
265
+ name = "indicatif"
266
+ version = "0.15.0"
267
+ source = "registry+https://github.com/rust-lang/crates.io-index"
268
+ checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
269
+ dependencies = [
270
+ "console",
271
+ "lazy_static",
272
+ "number_prefix",
273
+ "regex",
274
+ ]
275
+
276
+ [[package]]
277
+ name = "itertools"
278
+ version = "0.8.2"
279
+ source = "registry+https://github.com/rust-lang/crates.io-index"
280
+ checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484"
281
+ dependencies = [
282
+ "either",
283
+ ]
284
+
285
+ [[package]]
286
+ name = "itertools"
287
+ version = "0.9.0"
288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
289
+ checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
290
+ dependencies = [
291
+ "either",
292
+ ]
293
+
294
+ [[package]]
295
+ name = "itoa"
296
+ version = "1.0.5"
297
+ source = "registry+https://github.com/rust-lang/crates.io-index"
298
+ checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
299
+
300
+ [[package]]
301
+ name = "lazy_static"
302
+ version = "1.4.0"
303
+ source = "registry+https://github.com/rust-lang/crates.io-index"
304
+ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
305
+
306
+ [[package]]
307
+ name = "lazycell"
308
+ version = "1.3.0"
309
+ source = "registry+https://github.com/rust-lang/crates.io-index"
310
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
311
+
312
+ [[package]]
313
+ name = "libc"
314
+ version = "0.2.139"
315
+ source = "registry+https://github.com/rust-lang/crates.io-index"
316
+ checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
317
+
318
+ [[package]]
319
+ name = "libloading"
320
+ version = "0.7.4"
321
+ source = "registry+https://github.com/rust-lang/crates.io-index"
322
+ checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
323
+ dependencies = [
324
+ "cfg-if",
325
+ "winapi",
326
+ ]
327
+
328
+ [[package]]
329
+ name = "log"
330
+ version = "0.4.17"
331
+ source = "registry+https://github.com/rust-lang/crates.io-index"
332
+ checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
333
+ dependencies = [
334
+ "cfg-if",
335
+ ]
336
+
337
+ [[package]]
338
+ name = "macro_rules_attribute"
339
+ version = "0.1.3"
340
+ source = "registry+https://github.com/rust-lang/crates.io-index"
341
+ checksum = "cf0c9b980bf4f3a37fd7b1c066941dd1b1d0152ce6ee6e8fe8c49b9f6810d862"
342
+ dependencies = [
343
+ "macro_rules_attribute-proc_macro",
344
+ "paste",
345
+ ]
346
+
347
+ [[package]]
348
+ name = "macro_rules_attribute-proc_macro"
349
+ version = "0.1.3"
350
+ source = "registry+https://github.com/rust-lang/crates.io-index"
351
+ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
352
+
353
+ [[package]]
354
+ name = "magnus"
355
+ version = "0.5.0"
356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
357
+ checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
358
+ dependencies = [
359
+ "magnus-macros",
360
+ "rb-sys",
361
+ "rb-sys-env",
362
+ ]
363
+
364
+ [[package]]
365
+ name = "magnus-macros"
366
+ version = "0.4.0"
367
+ source = "registry+https://github.com/rust-lang/crates.io-index"
368
+ checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
369
+ dependencies = [
370
+ "proc-macro2",
371
+ "quote",
372
+ "syn",
373
+ ]
374
+
375
+ [[package]]
376
+ name = "memchr"
377
+ version = "2.5.0"
378
+ source = "registry+https://github.com/rust-lang/crates.io-index"
379
+ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
380
+
381
+ [[package]]
382
+ name = "memoffset"
383
+ version = "0.7.1"
384
+ source = "registry+https://github.com/rust-lang/crates.io-index"
385
+ checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
386
+ dependencies = [
387
+ "autocfg",
388
+ ]
389
+
390
+ [[package]]
391
+ name = "minimal-lexical"
392
+ version = "0.2.1"
393
+ source = "registry+https://github.com/rust-lang/crates.io-index"
394
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
395
+
396
+ [[package]]
397
+ name = "nom"
398
+ version = "7.1.3"
399
+ source = "registry+https://github.com/rust-lang/crates.io-index"
400
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
401
+ dependencies = [
402
+ "memchr",
403
+ "minimal-lexical",
404
+ ]
405
+
406
+ [[package]]
407
+ name = "num_cpus"
408
+ version = "1.15.0"
409
+ source = "registry+https://github.com/rust-lang/crates.io-index"
410
+ checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
411
+ dependencies = [
412
+ "hermit-abi",
413
+ "libc",
414
+ ]
415
+
416
+ [[package]]
417
+ name = "number_prefix"
418
+ version = "0.3.0"
419
+ source = "registry+https://github.com/rust-lang/crates.io-index"
420
+ checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
421
+
422
+ [[package]]
423
+ name = "once_cell"
424
+ version = "1.17.0"
425
+ source = "registry+https://github.com/rust-lang/crates.io-index"
426
+ checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
427
+
428
+ [[package]]
429
+ name = "onig"
430
+ version = "6.4.0"
431
+ source = "registry+https://github.com/rust-lang/crates.io-index"
432
+ checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
433
+ dependencies = [
434
+ "bitflags",
435
+ "libc",
436
+ "once_cell",
437
+ "onig_sys",
438
+ ]
439
+
440
+ [[package]]
441
+ name = "onig_sys"
442
+ version = "69.8.1"
443
+ source = "registry+https://github.com/rust-lang/crates.io-index"
444
+ checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
445
+ dependencies = [
446
+ "cc",
447
+ "pkg-config",
448
+ ]
449
+
450
+ [[package]]
451
+ name = "paste"
452
+ version = "1.0.11"
453
+ source = "registry+https://github.com/rust-lang/crates.io-index"
454
+ checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba"
455
+
456
+ [[package]]
457
+ name = "peeking_take_while"
458
+ version = "0.1.2"
459
+ source = "registry+https://github.com/rust-lang/crates.io-index"
460
+ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
461
+
462
+ [[package]]
463
+ name = "pkg-config"
464
+ version = "0.3.26"
465
+ source = "registry+https://github.com/rust-lang/crates.io-index"
466
+ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
467
+
468
+ [[package]]
469
+ name = "ppv-lite86"
470
+ version = "0.2.17"
471
+ source = "registry+https://github.com/rust-lang/crates.io-index"
472
+ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
473
+
474
+ [[package]]
475
+ name = "proc-macro2"
476
+ version = "1.0.51"
477
+ source = "registry+https://github.com/rust-lang/crates.io-index"
478
+ checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
479
+ dependencies = [
480
+ "unicode-ident",
481
+ ]
482
+
483
+ [[package]]
484
+ name = "quote"
485
+ version = "1.0.23"
486
+ source = "registry+https://github.com/rust-lang/crates.io-index"
487
+ checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
488
+ dependencies = [
489
+ "proc-macro2",
490
+ ]
491
+
492
+ [[package]]
493
+ name = "rand"
494
+ version = "0.8.5"
495
+ source = "registry+https://github.com/rust-lang/crates.io-index"
496
+ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
497
+ dependencies = [
498
+ "libc",
499
+ "rand_chacha",
500
+ "rand_core",
501
+ ]
502
+
503
+ [[package]]
504
+ name = "rand_chacha"
505
+ version = "0.3.1"
506
+ source = "registry+https://github.com/rust-lang/crates.io-index"
507
+ checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
508
+ dependencies = [
509
+ "ppv-lite86",
510
+ "rand_core",
511
+ ]
512
+
513
+ [[package]]
514
+ name = "rand_core"
515
+ version = "0.6.4"
516
+ source = "registry+https://github.com/rust-lang/crates.io-index"
517
+ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
518
+ dependencies = [
519
+ "getrandom",
520
+ ]
521
+
522
+ [[package]]
523
+ name = "rayon"
524
+ version = "1.6.1"
525
+ source = "registry+https://github.com/rust-lang/crates.io-index"
526
+ checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
527
+ dependencies = [
528
+ "either",
529
+ "rayon-core",
530
+ ]
531
+
532
+ [[package]]
533
+ name = "rayon-cond"
534
+ version = "0.1.0"
535
+ source = "registry+https://github.com/rust-lang/crates.io-index"
536
+ checksum = "fd1259362c9065e5ea39a789ef40b1e3fd934c94beb7b5ab3ac6629d3b5e7cb7"
537
+ dependencies = [
538
+ "either",
539
+ "itertools 0.8.2",
540
+ "rayon",
541
+ ]
542
+
543
+ [[package]]
544
+ name = "rayon-core"
545
+ version = "1.10.2"
546
+ source = "registry+https://github.com/rust-lang/crates.io-index"
547
+ checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
548
+ dependencies = [
549
+ "crossbeam-channel",
550
+ "crossbeam-deque",
551
+ "crossbeam-utils",
552
+ "num_cpus",
553
+ ]
554
+
555
+ [[package]]
556
+ name = "rb-sys"
557
+ version = "0.9.65"
558
+ source = "registry+https://github.com/rust-lang/crates.io-index"
559
+ checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
560
+ dependencies = [
561
+ "rb-sys-build",
562
+ ]
563
+
564
+ [[package]]
565
+ name = "rb-sys-build"
566
+ version = "0.9.65"
567
+ source = "registry+https://github.com/rust-lang/crates.io-index"
568
+ checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
569
+ dependencies = [
570
+ "bindgen",
571
+ "lazy_static",
572
+ "quote",
573
+ "regex",
574
+ "shell-words",
575
+ "syn",
576
+ ]
577
+
578
+ [[package]]
579
+ name = "rb-sys-env"
580
+ version = "0.1.2"
581
+ source = "registry+https://github.com/rust-lang/crates.io-index"
582
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
583
+
584
+ [[package]]
585
+ name = "regex"
586
+ version = "1.7.1"
587
+ source = "registry+https://github.com/rust-lang/crates.io-index"
588
+ checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
589
+ dependencies = [
590
+ "aho-corasick",
591
+ "memchr",
592
+ "regex-syntax",
593
+ ]
594
+
595
+ [[package]]
596
+ name = "regex-syntax"
597
+ version = "0.6.28"
598
+ source = "registry+https://github.com/rust-lang/crates.io-index"
599
+ checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
600
+
601
+ [[package]]
602
+ name = "rustc-hash"
603
+ version = "1.1.0"
604
+ source = "registry+https://github.com/rust-lang/crates.io-index"
605
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
606
+
607
+ [[package]]
608
+ name = "ryu"
609
+ version = "1.0.12"
610
+ source = "registry+https://github.com/rust-lang/crates.io-index"
611
+ checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
612
+
613
+ [[package]]
614
+ name = "scopeguard"
615
+ version = "1.1.0"
616
+ source = "registry+https://github.com/rust-lang/crates.io-index"
617
+ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
618
+
619
+ [[package]]
620
+ name = "serde"
621
+ version = "1.0.152"
622
+ source = "registry+https://github.com/rust-lang/crates.io-index"
623
+ checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
624
+ dependencies = [
625
+ "serde_derive",
626
+ ]
627
+
628
+ [[package]]
629
+ name = "serde_derive"
630
+ version = "1.0.152"
631
+ source = "registry+https://github.com/rust-lang/crates.io-index"
632
+ checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
633
+ dependencies = [
634
+ "proc-macro2",
635
+ "quote",
636
+ "syn",
637
+ ]
638
+
639
+ [[package]]
640
+ name = "serde_json"
641
+ version = "1.0.92"
642
+ source = "registry+https://github.com/rust-lang/crates.io-index"
643
+ checksum = "7434af0dc1cbd59268aa98b4c22c131c0584d2232f6fb166efb993e2832e896a"
644
+ dependencies = [
645
+ "itoa",
646
+ "ryu",
647
+ "serde",
648
+ ]
649
+
650
+ [[package]]
651
+ name = "shell-words"
652
+ version = "1.1.0"
653
+ source = "registry+https://github.com/rust-lang/crates.io-index"
654
+ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
655
+
656
+ [[package]]
657
+ name = "shlex"
658
+ version = "1.1.0"
659
+ source = "registry+https://github.com/rust-lang/crates.io-index"
660
+ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
661
+
662
+ [[package]]
663
+ name = "smallvec"
664
+ version = "1.10.0"
665
+ source = "registry+https://github.com/rust-lang/crates.io-index"
666
+ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
667
+
668
+ [[package]]
669
+ name = "spm_precompiled"
670
+ version = "0.1.4"
671
+ source = "registry+https://github.com/rust-lang/crates.io-index"
672
+ checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
673
+ dependencies = [
674
+ "base64",
675
+ "nom",
676
+ "serde",
677
+ "unicode-segmentation",
678
+ ]
679
+
680
+ [[package]]
681
+ name = "strsim"
682
+ version = "0.10.0"
683
+ source = "registry+https://github.com/rust-lang/crates.io-index"
684
+ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
685
+
686
+ [[package]]
687
+ name = "syn"
688
+ version = "1.0.107"
689
+ source = "registry+https://github.com/rust-lang/crates.io-index"
690
+ checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
691
+ dependencies = [
692
+ "proc-macro2",
693
+ "quote",
694
+ "unicode-ident",
695
+ ]
696
+
697
+ [[package]]
698
+ name = "thiserror"
699
+ version = "1.0.38"
700
+ source = "registry+https://github.com/rust-lang/crates.io-index"
701
+ checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
702
+ dependencies = [
703
+ "thiserror-impl",
704
+ ]
705
+
706
+ [[package]]
707
+ name = "thiserror-impl"
708
+ version = "1.0.38"
709
+ source = "registry+https://github.com/rust-lang/crates.io-index"
710
+ checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
711
+ dependencies = [
712
+ "proc-macro2",
713
+ "quote",
714
+ "syn",
715
+ ]
716
+
717
+ [[package]]
718
+ name = "tokenizers"
719
+ version = "0.3.1"
720
+ dependencies = [
721
+ "magnus",
722
+ "onig",
723
+ "serde",
724
+ "tokenizers 0.13.2",
725
+ ]
726
+
727
+ [[package]]
728
+ name = "tokenizers"
729
+ version = "0.13.2"
730
+ source = "git+https://github.com/huggingface/tokenizers#fa66caf0abff16bae2213658ffa3e969c5445750"
731
+ dependencies = [
732
+ "aho-corasick",
733
+ "derive_builder",
734
+ "esaxx-rs",
735
+ "getrandom",
736
+ "indicatif",
737
+ "itertools 0.9.0",
738
+ "lazy_static",
739
+ "log",
740
+ "macro_rules_attribute",
741
+ "onig",
742
+ "paste",
743
+ "rand",
744
+ "rayon",
745
+ "rayon-cond",
746
+ "regex",
747
+ "regex-syntax",
748
+ "serde",
749
+ "serde_json",
750
+ "spm_precompiled",
751
+ "thiserror",
752
+ "unicode-normalization-alignments",
753
+ "unicode-segmentation",
754
+ "unicode_categories",
755
+ ]
756
+
757
+ [[package]]
758
+ name = "unicode-ident"
759
+ version = "1.0.6"
760
+ source = "registry+https://github.com/rust-lang/crates.io-index"
761
+ checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
762
+
763
+ [[package]]
764
+ name = "unicode-normalization-alignments"
765
+ version = "0.1.12"
766
+ source = "registry+https://github.com/rust-lang/crates.io-index"
767
+ checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
768
+ dependencies = [
769
+ "smallvec",
770
+ ]
771
+
772
+ [[package]]
773
+ name = "unicode-segmentation"
774
+ version = "1.10.1"
775
+ source = "registry+https://github.com/rust-lang/crates.io-index"
776
+ checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
777
+
778
+ [[package]]
779
+ name = "unicode-width"
780
+ version = "0.1.10"
781
+ source = "registry+https://github.com/rust-lang/crates.io-index"
782
+ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
783
+
784
+ [[package]]
785
+ name = "unicode_categories"
786
+ version = "0.1.1"
787
+ source = "registry+https://github.com/rust-lang/crates.io-index"
788
+ checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
789
+
790
+ [[package]]
791
+ name = "wasi"
792
+ version = "0.11.0+wasi-snapshot-preview1"
793
+ source = "registry+https://github.com/rust-lang/crates.io-index"
794
+ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
795
+
796
+ [[package]]
797
+ name = "winapi"
798
+ version = "0.3.9"
799
+ source = "registry+https://github.com/rust-lang/crates.io-index"
800
+ checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
801
+ dependencies = [
802
+ "winapi-i686-pc-windows-gnu",
803
+ "winapi-x86_64-pc-windows-gnu",
804
+ ]
805
+
806
+ [[package]]
807
+ name = "winapi-i686-pc-windows-gnu"
808
+ version = "0.4.0"
809
+ source = "registry+https://github.com/rust-lang/crates.io-index"
810
+ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
811
+
812
+ [[package]]
813
+ name = "winapi-x86_64-pc-windows-gnu"
814
+ version = "0.4.0"
815
+ source = "registry+https://github.com/rust-lang/crates.io-index"
816
+ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
817
+
818
+ [[package]]
819
+ name = "windows-sys"
820
+ version = "0.42.0"
821
+ source = "registry+https://github.com/rust-lang/crates.io-index"
822
+ checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
823
+ dependencies = [
824
+ "windows_aarch64_gnullvm",
825
+ "windows_aarch64_msvc",
826
+ "windows_i686_gnu",
827
+ "windows_i686_msvc",
828
+ "windows_x86_64_gnu",
829
+ "windows_x86_64_gnullvm",
830
+ "windows_x86_64_msvc",
831
+ ]
832
+
833
+ [[package]]
834
+ name = "windows_aarch64_gnullvm"
835
+ version = "0.42.1"
836
+ source = "registry+https://github.com/rust-lang/crates.io-index"
837
+ checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
838
+
839
+ [[package]]
840
+ name = "windows_aarch64_msvc"
841
+ version = "0.42.1"
842
+ source = "registry+https://github.com/rust-lang/crates.io-index"
843
+ checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
844
+
845
+ [[package]]
846
+ name = "windows_i686_gnu"
847
+ version = "0.42.1"
848
+ source = "registry+https://github.com/rust-lang/crates.io-index"
849
+ checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
850
+
851
+ [[package]]
852
+ name = "windows_i686_msvc"
853
+ version = "0.42.1"
854
+ source = "registry+https://github.com/rust-lang/crates.io-index"
855
+ checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
856
+
857
+ [[package]]
858
+ name = "windows_x86_64_gnu"
859
+ version = "0.42.1"
860
+ source = "registry+https://github.com/rust-lang/crates.io-index"
861
+ checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
862
+
863
+ [[package]]
864
+ name = "windows_x86_64_gnullvm"
865
+ version = "0.42.1"
866
+ source = "registry+https://github.com/rust-lang/crates.io-index"
867
+ checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
868
+
869
+ [[package]]
870
+ name = "windows_x86_64_msvc"
871
+ version = "0.42.1"
872
+ source = "registry+https://github.com/rust-lang/crates.io-index"
873
+ checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"