tokenizers 0.3.2-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +56 -0
- data/Cargo.lock +873 -0
- data/Cargo.toml +5 -0
- data/LICENSE-THIRD-PARTY.txt +17286 -0
- data/LICENSE.txt +202 -0
- data/README.md +69 -0
- data/lib/tokenizers/2.7/tokenizers.so +0 -0
- data/lib/tokenizers/3.0/tokenizers.so +0 -0
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/char_bpe_tokenizer.rb +22 -0
- data/lib/tokenizers/decoders/bpe_decoder.rb +9 -0
- data/lib/tokenizers/decoders/ctc.rb +9 -0
- data/lib/tokenizers/decoders/metaspace.rb +9 -0
- data/lib/tokenizers/decoders/word_piece.rb +9 -0
- data/lib/tokenizers/encoding.rb +19 -0
- data/lib/tokenizers/from_pretrained.rb +119 -0
- data/lib/tokenizers/models/bpe.rb +9 -0
- data/lib/tokenizers/models/unigram.rb +9 -0
- data/lib/tokenizers/models/word_level.rb +13 -0
- data/lib/tokenizers/models/word_piece.rb +9 -0
- data/lib/tokenizers/normalizers/bert_normalizer.rb +9 -0
- data/lib/tokenizers/normalizers/strip.rb +9 -0
- data/lib/tokenizers/pre_tokenizers/byte_level.rb +9 -0
- data/lib/tokenizers/pre_tokenizers/digits.rb +9 -0
- data/lib/tokenizers/pre_tokenizers/metaspace.rb +9 -0
- data/lib/tokenizers/pre_tokenizers/punctuation.rb +9 -0
- data/lib/tokenizers/pre_tokenizers/split.rb +9 -0
- data/lib/tokenizers/processors/byte_level.rb +9 -0
- data/lib/tokenizers/processors/roberta_processing.rb +9 -0
- data/lib/tokenizers/processors/template_processing.rb +9 -0
- data/lib/tokenizers/tokenizer.rb +45 -0
- data/lib/tokenizers/trainers/bpe_trainer.rb +9 -0
- data/lib/tokenizers/trainers/unigram_trainer.rb +26 -0
- data/lib/tokenizers/trainers/word_level_trainer.rb +9 -0
- data/lib/tokenizers/trainers/word_piece_trainer.rb +26 -0
- data/lib/tokenizers/version.rb +3 -0
- data/lib/tokenizers.rb +59 -0
- metadata +83 -0
data/Cargo.lock
ADDED
@@ -0,0 +1,873 @@
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
2
|
+
# It is not intended for manual editing.
|
3
|
+
version = 3
|
4
|
+
|
5
|
+
[[package]]
|
6
|
+
name = "aho-corasick"
|
7
|
+
version = "0.7.20"
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
+
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
|
10
|
+
dependencies = [
|
11
|
+
"memchr",
|
12
|
+
]
|
13
|
+
|
14
|
+
[[package]]
|
15
|
+
name = "autocfg"
|
16
|
+
version = "1.1.0"
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
+
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
19
|
+
|
20
|
+
[[package]]
|
21
|
+
name = "base64"
|
22
|
+
version = "0.13.1"
|
23
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
24
|
+
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
25
|
+
|
26
|
+
[[package]]
|
27
|
+
name = "bindgen"
|
28
|
+
version = "0.60.1"
|
29
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
30
|
+
checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
|
31
|
+
dependencies = [
|
32
|
+
"bitflags",
|
33
|
+
"cexpr",
|
34
|
+
"clang-sys",
|
35
|
+
"lazy_static",
|
36
|
+
"lazycell",
|
37
|
+
"peeking_take_while",
|
38
|
+
"proc-macro2",
|
39
|
+
"quote",
|
40
|
+
"regex",
|
41
|
+
"rustc-hash",
|
42
|
+
"shlex",
|
43
|
+
]
|
44
|
+
|
45
|
+
[[package]]
|
46
|
+
name = "bitflags"
|
47
|
+
version = "1.3.2"
|
48
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
49
|
+
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
50
|
+
|
51
|
+
[[package]]
|
52
|
+
name = "cc"
|
53
|
+
version = "1.0.79"
|
54
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
55
|
+
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
56
|
+
|
57
|
+
[[package]]
|
58
|
+
name = "cexpr"
|
59
|
+
version = "0.6.0"
|
60
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
61
|
+
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
62
|
+
dependencies = [
|
63
|
+
"nom",
|
64
|
+
]
|
65
|
+
|
66
|
+
[[package]]
|
67
|
+
name = "cfg-if"
|
68
|
+
version = "1.0.0"
|
69
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
70
|
+
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
71
|
+
|
72
|
+
[[package]]
|
73
|
+
name = "clang-sys"
|
74
|
+
version = "1.4.0"
|
75
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
76
|
+
checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3"
|
77
|
+
dependencies = [
|
78
|
+
"glob",
|
79
|
+
"libc",
|
80
|
+
"libloading",
|
81
|
+
]
|
82
|
+
|
83
|
+
[[package]]
|
84
|
+
name = "console"
|
85
|
+
version = "0.15.5"
|
86
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
87
|
+
checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60"
|
88
|
+
dependencies = [
|
89
|
+
"encode_unicode",
|
90
|
+
"lazy_static",
|
91
|
+
"libc",
|
92
|
+
"unicode-width",
|
93
|
+
"windows-sys",
|
94
|
+
]
|
95
|
+
|
96
|
+
[[package]]
|
97
|
+
name = "crossbeam-channel"
|
98
|
+
version = "0.5.6"
|
99
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
100
|
+
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
101
|
+
dependencies = [
|
102
|
+
"cfg-if",
|
103
|
+
"crossbeam-utils",
|
104
|
+
]
|
105
|
+
|
106
|
+
[[package]]
|
107
|
+
name = "crossbeam-deque"
|
108
|
+
version = "0.8.2"
|
109
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
+
checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
|
111
|
+
dependencies = [
|
112
|
+
"cfg-if",
|
113
|
+
"crossbeam-epoch",
|
114
|
+
"crossbeam-utils",
|
115
|
+
]
|
116
|
+
|
117
|
+
[[package]]
|
118
|
+
name = "crossbeam-epoch"
|
119
|
+
version = "0.9.13"
|
120
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
121
|
+
checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a"
|
122
|
+
dependencies = [
|
123
|
+
"autocfg",
|
124
|
+
"cfg-if",
|
125
|
+
"crossbeam-utils",
|
126
|
+
"memoffset",
|
127
|
+
"scopeguard",
|
128
|
+
]
|
129
|
+
|
130
|
+
[[package]]
|
131
|
+
name = "crossbeam-utils"
|
132
|
+
version = "0.8.14"
|
133
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
134
|
+
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
|
135
|
+
dependencies = [
|
136
|
+
"cfg-if",
|
137
|
+
]
|
138
|
+
|
139
|
+
[[package]]
|
140
|
+
name = "darling"
|
141
|
+
version = "0.14.3"
|
142
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
143
|
+
checksum = "c0808e1bd8671fb44a113a14e13497557533369847788fa2ae912b6ebfce9fa8"
|
144
|
+
dependencies = [
|
145
|
+
"darling_core",
|
146
|
+
"darling_macro",
|
147
|
+
]
|
148
|
+
|
149
|
+
[[package]]
|
150
|
+
name = "darling_core"
|
151
|
+
version = "0.14.3"
|
152
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
153
|
+
checksum = "001d80444f28e193f30c2f293455da62dcf9a6b29918a4253152ae2b1de592cb"
|
154
|
+
dependencies = [
|
155
|
+
"fnv",
|
156
|
+
"ident_case",
|
157
|
+
"proc-macro2",
|
158
|
+
"quote",
|
159
|
+
"strsim",
|
160
|
+
"syn",
|
161
|
+
]
|
162
|
+
|
163
|
+
[[package]]
|
164
|
+
name = "darling_macro"
|
165
|
+
version = "0.14.3"
|
166
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
167
|
+
checksum = "b36230598a2d5de7ec1c6f51f72d8a99a9208daff41de2084d06e3fd3ea56685"
|
168
|
+
dependencies = [
|
169
|
+
"darling_core",
|
170
|
+
"quote",
|
171
|
+
"syn",
|
172
|
+
]
|
173
|
+
|
174
|
+
[[package]]
|
175
|
+
name = "derive_builder"
|
176
|
+
version = "0.12.0"
|
177
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
178
|
+
checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
|
179
|
+
dependencies = [
|
180
|
+
"derive_builder_macro",
|
181
|
+
]
|
182
|
+
|
183
|
+
[[package]]
|
184
|
+
name = "derive_builder_core"
|
185
|
+
version = "0.12.0"
|
186
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
187
|
+
checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f"
|
188
|
+
dependencies = [
|
189
|
+
"darling",
|
190
|
+
"proc-macro2",
|
191
|
+
"quote",
|
192
|
+
"syn",
|
193
|
+
]
|
194
|
+
|
195
|
+
[[package]]
|
196
|
+
name = "derive_builder_macro"
|
197
|
+
version = "0.12.0"
|
198
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
199
|
+
checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
|
200
|
+
dependencies = [
|
201
|
+
"derive_builder_core",
|
202
|
+
"syn",
|
203
|
+
]
|
204
|
+
|
205
|
+
[[package]]
|
206
|
+
name = "either"
|
207
|
+
version = "1.8.1"
|
208
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
209
|
+
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
210
|
+
|
211
|
+
[[package]]
|
212
|
+
name = "encode_unicode"
|
213
|
+
version = "0.3.6"
|
214
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
215
|
+
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
216
|
+
|
217
|
+
[[package]]
|
218
|
+
name = "esaxx-rs"
|
219
|
+
version = "0.1.8"
|
220
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
221
|
+
checksum = "1f748b253ceca9fed5f42f8b5ceb3851e93102199bc25b64b65369f76e5c0a35"
|
222
|
+
dependencies = [
|
223
|
+
"cc",
|
224
|
+
]
|
225
|
+
|
226
|
+
[[package]]
|
227
|
+
name = "fnv"
|
228
|
+
version = "1.0.7"
|
229
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
230
|
+
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
231
|
+
|
232
|
+
[[package]]
|
233
|
+
name = "getrandom"
|
234
|
+
version = "0.2.8"
|
235
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
236
|
+
checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
|
237
|
+
dependencies = [
|
238
|
+
"cfg-if",
|
239
|
+
"libc",
|
240
|
+
"wasi",
|
241
|
+
]
|
242
|
+
|
243
|
+
[[package]]
|
244
|
+
name = "glob"
|
245
|
+
version = "0.3.1"
|
246
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
247
|
+
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
248
|
+
|
249
|
+
[[package]]
|
250
|
+
name = "hermit-abi"
|
251
|
+
version = "0.2.6"
|
252
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
253
|
+
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
|
254
|
+
dependencies = [
|
255
|
+
"libc",
|
256
|
+
]
|
257
|
+
|
258
|
+
[[package]]
|
259
|
+
name = "ident_case"
|
260
|
+
version = "1.0.1"
|
261
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
262
|
+
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
263
|
+
|
264
|
+
[[package]]
|
265
|
+
name = "indicatif"
|
266
|
+
version = "0.15.0"
|
267
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
268
|
+
checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
|
269
|
+
dependencies = [
|
270
|
+
"console",
|
271
|
+
"lazy_static",
|
272
|
+
"number_prefix",
|
273
|
+
"regex",
|
274
|
+
]
|
275
|
+
|
276
|
+
[[package]]
|
277
|
+
name = "itertools"
|
278
|
+
version = "0.8.2"
|
279
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
280
|
+
checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484"
|
281
|
+
dependencies = [
|
282
|
+
"either",
|
283
|
+
]
|
284
|
+
|
285
|
+
[[package]]
|
286
|
+
name = "itertools"
|
287
|
+
version = "0.9.0"
|
288
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
289
|
+
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
|
290
|
+
dependencies = [
|
291
|
+
"either",
|
292
|
+
]
|
293
|
+
|
294
|
+
[[package]]
|
295
|
+
name = "itoa"
|
296
|
+
version = "1.0.5"
|
297
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
298
|
+
checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
|
299
|
+
|
300
|
+
[[package]]
|
301
|
+
name = "lazy_static"
|
302
|
+
version = "1.4.0"
|
303
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
304
|
+
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
305
|
+
|
306
|
+
[[package]]
|
307
|
+
name = "lazycell"
|
308
|
+
version = "1.3.0"
|
309
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
310
|
+
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
311
|
+
|
312
|
+
[[package]]
|
313
|
+
name = "libc"
|
314
|
+
version = "0.2.139"
|
315
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
316
|
+
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
|
317
|
+
|
318
|
+
[[package]]
|
319
|
+
name = "libloading"
|
320
|
+
version = "0.7.4"
|
321
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
322
|
+
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
|
323
|
+
dependencies = [
|
324
|
+
"cfg-if",
|
325
|
+
"winapi",
|
326
|
+
]
|
327
|
+
|
328
|
+
[[package]]
|
329
|
+
name = "log"
|
330
|
+
version = "0.4.17"
|
331
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
332
|
+
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
|
333
|
+
dependencies = [
|
334
|
+
"cfg-if",
|
335
|
+
]
|
336
|
+
|
337
|
+
[[package]]
|
338
|
+
name = "macro_rules_attribute"
|
339
|
+
version = "0.1.3"
|
340
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
341
|
+
checksum = "cf0c9b980bf4f3a37fd7b1c066941dd1b1d0152ce6ee6e8fe8c49b9f6810d862"
|
342
|
+
dependencies = [
|
343
|
+
"macro_rules_attribute-proc_macro",
|
344
|
+
"paste",
|
345
|
+
]
|
346
|
+
|
347
|
+
[[package]]
|
348
|
+
name = "macro_rules_attribute-proc_macro"
|
349
|
+
version = "0.1.3"
|
350
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
+
checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
|
352
|
+
|
353
|
+
[[package]]
|
354
|
+
name = "magnus"
|
355
|
+
version = "0.5.0"
|
356
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
357
|
+
checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
|
358
|
+
dependencies = [
|
359
|
+
"magnus-macros",
|
360
|
+
"rb-sys",
|
361
|
+
"rb-sys-env",
|
362
|
+
]
|
363
|
+
|
364
|
+
[[package]]
|
365
|
+
name = "magnus-macros"
|
366
|
+
version = "0.4.0"
|
367
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
368
|
+
checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
|
369
|
+
dependencies = [
|
370
|
+
"proc-macro2",
|
371
|
+
"quote",
|
372
|
+
"syn",
|
373
|
+
]
|
374
|
+
|
375
|
+
[[package]]
|
376
|
+
name = "memchr"
|
377
|
+
version = "2.5.0"
|
378
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
379
|
+
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
380
|
+
|
381
|
+
[[package]]
|
382
|
+
name = "memoffset"
|
383
|
+
version = "0.7.1"
|
384
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
385
|
+
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
|
386
|
+
dependencies = [
|
387
|
+
"autocfg",
|
388
|
+
]
|
389
|
+
|
390
|
+
[[package]]
|
391
|
+
name = "minimal-lexical"
|
392
|
+
version = "0.2.1"
|
393
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
394
|
+
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
395
|
+
|
396
|
+
[[package]]
|
397
|
+
name = "nom"
|
398
|
+
version = "7.1.3"
|
399
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
400
|
+
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
401
|
+
dependencies = [
|
402
|
+
"memchr",
|
403
|
+
"minimal-lexical",
|
404
|
+
]
|
405
|
+
|
406
|
+
[[package]]
|
407
|
+
name = "num_cpus"
|
408
|
+
version = "1.15.0"
|
409
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
410
|
+
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
411
|
+
dependencies = [
|
412
|
+
"hermit-abi",
|
413
|
+
"libc",
|
414
|
+
]
|
415
|
+
|
416
|
+
[[package]]
|
417
|
+
name = "number_prefix"
|
418
|
+
version = "0.3.0"
|
419
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
420
|
+
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
|
421
|
+
|
422
|
+
[[package]]
|
423
|
+
name = "once_cell"
|
424
|
+
version = "1.17.0"
|
425
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
426
|
+
checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
|
427
|
+
|
428
|
+
[[package]]
|
429
|
+
name = "onig"
|
430
|
+
version = "6.4.0"
|
431
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
432
|
+
checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
|
433
|
+
dependencies = [
|
434
|
+
"bitflags",
|
435
|
+
"libc",
|
436
|
+
"once_cell",
|
437
|
+
"onig_sys",
|
438
|
+
]
|
439
|
+
|
440
|
+
[[package]]
|
441
|
+
name = "onig_sys"
|
442
|
+
version = "69.8.1"
|
443
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
444
|
+
checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
|
445
|
+
dependencies = [
|
446
|
+
"cc",
|
447
|
+
"pkg-config",
|
448
|
+
]
|
449
|
+
|
450
|
+
[[package]]
|
451
|
+
name = "paste"
|
452
|
+
version = "1.0.11"
|
453
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
454
|
+
checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba"
|
455
|
+
|
456
|
+
[[package]]
|
457
|
+
name = "peeking_take_while"
|
458
|
+
version = "0.1.2"
|
459
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
460
|
+
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
461
|
+
|
462
|
+
[[package]]
|
463
|
+
name = "pkg-config"
|
464
|
+
version = "0.3.26"
|
465
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
466
|
+
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
467
|
+
|
468
|
+
[[package]]
|
469
|
+
name = "ppv-lite86"
|
470
|
+
version = "0.2.17"
|
471
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
472
|
+
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
473
|
+
|
474
|
+
[[package]]
|
475
|
+
name = "proc-macro2"
|
476
|
+
version = "1.0.51"
|
477
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
478
|
+
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
|
479
|
+
dependencies = [
|
480
|
+
"unicode-ident",
|
481
|
+
]
|
482
|
+
|
483
|
+
[[package]]
|
484
|
+
name = "quote"
|
485
|
+
version = "1.0.23"
|
486
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
487
|
+
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
|
488
|
+
dependencies = [
|
489
|
+
"proc-macro2",
|
490
|
+
]
|
491
|
+
|
492
|
+
[[package]]
|
493
|
+
name = "rand"
|
494
|
+
version = "0.8.5"
|
495
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
496
|
+
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
497
|
+
dependencies = [
|
498
|
+
"libc",
|
499
|
+
"rand_chacha",
|
500
|
+
"rand_core",
|
501
|
+
]
|
502
|
+
|
503
|
+
[[package]]
|
504
|
+
name = "rand_chacha"
|
505
|
+
version = "0.3.1"
|
506
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
507
|
+
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
508
|
+
dependencies = [
|
509
|
+
"ppv-lite86",
|
510
|
+
"rand_core",
|
511
|
+
]
|
512
|
+
|
513
|
+
[[package]]
|
514
|
+
name = "rand_core"
|
515
|
+
version = "0.6.4"
|
516
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
517
|
+
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
518
|
+
dependencies = [
|
519
|
+
"getrandom",
|
520
|
+
]
|
521
|
+
|
522
|
+
[[package]]
|
523
|
+
name = "rayon"
|
524
|
+
version = "1.6.1"
|
525
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
526
|
+
checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7"
|
527
|
+
dependencies = [
|
528
|
+
"either",
|
529
|
+
"rayon-core",
|
530
|
+
]
|
531
|
+
|
532
|
+
[[package]]
|
533
|
+
name = "rayon-cond"
|
534
|
+
version = "0.1.0"
|
535
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
536
|
+
checksum = "fd1259362c9065e5ea39a789ef40b1e3fd934c94beb7b5ab3ac6629d3b5e7cb7"
|
537
|
+
dependencies = [
|
538
|
+
"either",
|
539
|
+
"itertools 0.8.2",
|
540
|
+
"rayon",
|
541
|
+
]
|
542
|
+
|
543
|
+
[[package]]
|
544
|
+
name = "rayon-core"
|
545
|
+
version = "1.10.2"
|
546
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
547
|
+
checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b"
|
548
|
+
dependencies = [
|
549
|
+
"crossbeam-channel",
|
550
|
+
"crossbeam-deque",
|
551
|
+
"crossbeam-utils",
|
552
|
+
"num_cpus",
|
553
|
+
]
|
554
|
+
|
555
|
+
[[package]]
|
556
|
+
name = "rb-sys"
|
557
|
+
version = "0.9.65"
|
558
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
559
|
+
checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
|
560
|
+
dependencies = [
|
561
|
+
"rb-sys-build",
|
562
|
+
]
|
563
|
+
|
564
|
+
[[package]]
|
565
|
+
name = "rb-sys-build"
|
566
|
+
version = "0.9.65"
|
567
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
568
|
+
checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
|
569
|
+
dependencies = [
|
570
|
+
"bindgen",
|
571
|
+
"lazy_static",
|
572
|
+
"quote",
|
573
|
+
"regex",
|
574
|
+
"shell-words",
|
575
|
+
"syn",
|
576
|
+
]
|
577
|
+
|
578
|
+
[[package]]
|
579
|
+
name = "rb-sys-env"
|
580
|
+
version = "0.1.2"
|
581
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
582
|
+
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
583
|
+
|
584
|
+
[[package]]
|
585
|
+
name = "regex"
|
586
|
+
version = "1.7.1"
|
587
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
588
|
+
checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
|
589
|
+
dependencies = [
|
590
|
+
"aho-corasick",
|
591
|
+
"memchr",
|
592
|
+
"regex-syntax",
|
593
|
+
]
|
594
|
+
|
595
|
+
[[package]]
|
596
|
+
name = "regex-syntax"
|
597
|
+
version = "0.6.28"
|
598
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
599
|
+
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
600
|
+
|
601
|
+
[[package]]
|
602
|
+
name = "rustc-hash"
|
603
|
+
version = "1.1.0"
|
604
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
605
|
+
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
606
|
+
|
607
|
+
[[package]]
|
608
|
+
name = "ryu"
|
609
|
+
version = "1.0.12"
|
610
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
611
|
+
checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
|
612
|
+
|
613
|
+
[[package]]
|
614
|
+
name = "scopeguard"
|
615
|
+
version = "1.1.0"
|
616
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
617
|
+
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
618
|
+
|
619
|
+
[[package]]
|
620
|
+
name = "serde"
|
621
|
+
version = "1.0.152"
|
622
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
623
|
+
checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
|
624
|
+
dependencies = [
|
625
|
+
"serde_derive",
|
626
|
+
]
|
627
|
+
|
628
|
+
[[package]]
|
629
|
+
name = "serde_derive"
|
630
|
+
version = "1.0.152"
|
631
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
632
|
+
checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
|
633
|
+
dependencies = [
|
634
|
+
"proc-macro2",
|
635
|
+
"quote",
|
636
|
+
"syn",
|
637
|
+
]
|
638
|
+
|
639
|
+
[[package]]
|
640
|
+
name = "serde_json"
|
641
|
+
version = "1.0.92"
|
642
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
643
|
+
checksum = "7434af0dc1cbd59268aa98b4c22c131c0584d2232f6fb166efb993e2832e896a"
|
644
|
+
dependencies = [
|
645
|
+
"itoa",
|
646
|
+
"ryu",
|
647
|
+
"serde",
|
648
|
+
]
|
649
|
+
|
650
|
+
[[package]]
|
651
|
+
name = "shell-words"
|
652
|
+
version = "1.1.0"
|
653
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
654
|
+
checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
655
|
+
|
656
|
+
[[package]]
|
657
|
+
name = "shlex"
|
658
|
+
version = "1.1.0"
|
659
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
660
|
+
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
|
661
|
+
|
662
|
+
[[package]]
|
663
|
+
name = "smallvec"
|
664
|
+
version = "1.10.0"
|
665
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
666
|
+
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
667
|
+
|
668
|
+
[[package]]
|
669
|
+
name = "spm_precompiled"
|
670
|
+
version = "0.1.4"
|
671
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
672
|
+
checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326"
|
673
|
+
dependencies = [
|
674
|
+
"base64",
|
675
|
+
"nom",
|
676
|
+
"serde",
|
677
|
+
"unicode-segmentation",
|
678
|
+
]
|
679
|
+
|
680
|
+
[[package]]
|
681
|
+
name = "strsim"
|
682
|
+
version = "0.10.0"
|
683
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
684
|
+
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
685
|
+
|
686
|
+
[[package]]
|
687
|
+
name = "syn"
|
688
|
+
version = "1.0.107"
|
689
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
690
|
+
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
|
691
|
+
dependencies = [
|
692
|
+
"proc-macro2",
|
693
|
+
"quote",
|
694
|
+
"unicode-ident",
|
695
|
+
]
|
696
|
+
|
697
|
+
[[package]]
|
698
|
+
name = "thiserror"
|
699
|
+
version = "1.0.38"
|
700
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
701
|
+
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
|
702
|
+
dependencies = [
|
703
|
+
"thiserror-impl",
|
704
|
+
]
|
705
|
+
|
706
|
+
[[package]]
|
707
|
+
name = "thiserror-impl"
|
708
|
+
version = "1.0.38"
|
709
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
710
|
+
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
|
711
|
+
dependencies = [
|
712
|
+
"proc-macro2",
|
713
|
+
"quote",
|
714
|
+
"syn",
|
715
|
+
]
|
716
|
+
|
717
|
+
[[package]]
|
718
|
+
name = "tokenizers"
|
719
|
+
version = "0.3.1"
|
720
|
+
dependencies = [
|
721
|
+
"magnus",
|
722
|
+
"onig",
|
723
|
+
"serde",
|
724
|
+
"tokenizers 0.13.2",
|
725
|
+
]
|
726
|
+
|
727
|
+
[[package]]
|
728
|
+
name = "tokenizers"
|
729
|
+
version = "0.13.2"
|
730
|
+
source = "git+https://github.com/huggingface/tokenizers#fa66caf0abff16bae2213658ffa3e969c5445750"
|
731
|
+
dependencies = [
|
732
|
+
"aho-corasick",
|
733
|
+
"derive_builder",
|
734
|
+
"esaxx-rs",
|
735
|
+
"getrandom",
|
736
|
+
"indicatif",
|
737
|
+
"itertools 0.9.0",
|
738
|
+
"lazy_static",
|
739
|
+
"log",
|
740
|
+
"macro_rules_attribute",
|
741
|
+
"onig",
|
742
|
+
"paste",
|
743
|
+
"rand",
|
744
|
+
"rayon",
|
745
|
+
"rayon-cond",
|
746
|
+
"regex",
|
747
|
+
"regex-syntax",
|
748
|
+
"serde",
|
749
|
+
"serde_json",
|
750
|
+
"spm_precompiled",
|
751
|
+
"thiserror",
|
752
|
+
"unicode-normalization-alignments",
|
753
|
+
"unicode-segmentation",
|
754
|
+
"unicode_categories",
|
755
|
+
]
|
756
|
+
|
757
|
+
[[package]]
|
758
|
+
name = "unicode-ident"
|
759
|
+
version = "1.0.6"
|
760
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
761
|
+
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
|
762
|
+
|
763
|
+
[[package]]
|
764
|
+
name = "unicode-normalization-alignments"
|
765
|
+
version = "0.1.12"
|
766
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
767
|
+
checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de"
|
768
|
+
dependencies = [
|
769
|
+
"smallvec",
|
770
|
+
]
|
771
|
+
|
772
|
+
[[package]]
|
773
|
+
name = "unicode-segmentation"
|
774
|
+
version = "1.10.1"
|
775
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
776
|
+
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
|
777
|
+
|
778
|
+
[[package]]
|
779
|
+
name = "unicode-width"
|
780
|
+
version = "0.1.10"
|
781
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
782
|
+
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
783
|
+
|
784
|
+
[[package]]
|
785
|
+
name = "unicode_categories"
|
786
|
+
version = "0.1.1"
|
787
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
788
|
+
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
789
|
+
|
790
|
+
[[package]]
|
791
|
+
name = "wasi"
|
792
|
+
version = "0.11.0+wasi-snapshot-preview1"
|
793
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
794
|
+
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
795
|
+
|
796
|
+
[[package]]
|
797
|
+
name = "winapi"
|
798
|
+
version = "0.3.9"
|
799
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
800
|
+
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
801
|
+
dependencies = [
|
802
|
+
"winapi-i686-pc-windows-gnu",
|
803
|
+
"winapi-x86_64-pc-windows-gnu",
|
804
|
+
]
|
805
|
+
|
806
|
+
[[package]]
|
807
|
+
name = "winapi-i686-pc-windows-gnu"
|
808
|
+
version = "0.4.0"
|
809
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
810
|
+
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
811
|
+
|
812
|
+
[[package]]
|
813
|
+
name = "winapi-x86_64-pc-windows-gnu"
|
814
|
+
version = "0.4.0"
|
815
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
816
|
+
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
817
|
+
|
818
|
+
[[package]]
|
819
|
+
name = "windows-sys"
|
820
|
+
version = "0.42.0"
|
821
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
822
|
+
checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
|
823
|
+
dependencies = [
|
824
|
+
"windows_aarch64_gnullvm",
|
825
|
+
"windows_aarch64_msvc",
|
826
|
+
"windows_i686_gnu",
|
827
|
+
"windows_i686_msvc",
|
828
|
+
"windows_x86_64_gnu",
|
829
|
+
"windows_x86_64_gnullvm",
|
830
|
+
"windows_x86_64_msvc",
|
831
|
+
]
|
832
|
+
|
833
|
+
[[package]]
|
834
|
+
name = "windows_aarch64_gnullvm"
|
835
|
+
version = "0.42.1"
|
836
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
837
|
+
checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"
|
838
|
+
|
839
|
+
[[package]]
|
840
|
+
name = "windows_aarch64_msvc"
|
841
|
+
version = "0.42.1"
|
842
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
843
|
+
checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"
|
844
|
+
|
845
|
+
[[package]]
|
846
|
+
name = "windows_i686_gnu"
|
847
|
+
version = "0.42.1"
|
848
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
849
|
+
checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"
|
850
|
+
|
851
|
+
[[package]]
|
852
|
+
name = "windows_i686_msvc"
|
853
|
+
version = "0.42.1"
|
854
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
855
|
+
checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"
|
856
|
+
|
857
|
+
[[package]]
|
858
|
+
name = "windows_x86_64_gnu"
|
859
|
+
version = "0.42.1"
|
860
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
861
|
+
checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"
|
862
|
+
|
863
|
+
[[package]]
|
864
|
+
name = "windows_x86_64_gnullvm"
|
865
|
+
version = "0.42.1"
|
866
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
867
|
+
checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"
|
868
|
+
|
869
|
+
[[package]]
|
870
|
+
name = "windows_x86_64_msvc"
|
871
|
+
version = "0.42.1"
|
872
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
873
|
+
checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
|