deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +8 -25
- deepdoctection/analyzer/dd.py +84 -71
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +78 -56
- deepdoctection/datapoint/box.py +7 -7
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +157 -75
- deepdoctection/datapoint/view.py +175 -151
- deepdoctection/datasets/adapter.py +30 -24
- deepdoctection/datasets/base.py +10 -10
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +23 -25
- deepdoctection/datasets/instances/doclaynet.py +48 -49
- deepdoctection/datasets/instances/fintabnet.py +44 -45
- deepdoctection/datasets/instances/funsd.py +23 -23
- deepdoctection/datasets/instances/iiitar13k.py +8 -8
- deepdoctection/datasets/instances/layouttest.py +2 -2
- deepdoctection/datasets/instances/publaynet.py +3 -3
- deepdoctection/datasets/instances/pubtables1m.py +18 -18
- deepdoctection/datasets/instances/pubtabnet.py +30 -29
- deepdoctection/datasets/instances/rvlcdip.py +28 -29
- deepdoctection/datasets/instances/xfund.py +51 -30
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +13 -12
- deepdoctection/eval/eval.py +32 -26
- deepdoctection/eval/tedsmetric.py +16 -12
- deepdoctection/eval/tp_eval_callback.py +7 -16
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +69 -89
- deepdoctection/extern/deskew.py +11 -10
- deepdoctection/extern/doctrocr.py +81 -64
- deepdoctection/extern/fastlang.py +23 -16
- deepdoctection/extern/hfdetr.py +53 -38
- deepdoctection/extern/hflayoutlm.py +216 -155
- deepdoctection/extern/hflm.py +35 -30
- deepdoctection/extern/model.py +433 -255
- deepdoctection/extern/pdftext.py +15 -15
- deepdoctection/extern/pt/ptutils.py +4 -2
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +14 -16
- deepdoctection/extern/tp/tfutils.py +16 -2
- deepdoctection/extern/tp/tpcompat.py +11 -7
- deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
- deepdoctection/extern/tpdetect.py +40 -45
- deepdoctection/mapper/cats.py +36 -40
- deepdoctection/mapper/cocostruct.py +16 -12
- deepdoctection/mapper/d2struct.py +22 -22
- deepdoctection/mapper/hfstruct.py +7 -7
- deepdoctection/mapper/laylmstruct.py +22 -24
- deepdoctection/mapper/maputils.py +9 -10
- deepdoctection/mapper/match.py +33 -2
- deepdoctection/mapper/misc.py +6 -7
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +6 -6
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/anngen.py +39 -14
- deepdoctection/pipe/base.py +68 -99
- deepdoctection/pipe/common.py +181 -85
- deepdoctection/pipe/concurrency.py +14 -10
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +18 -16
- deepdoctection/pipe/lm.py +49 -47
- deepdoctection/pipe/order.py +63 -65
- deepdoctection/pipe/refine.py +102 -109
- deepdoctection/pipe/segment.py +157 -162
- deepdoctection/pipe/sub_layout.py +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/d2_frcnn_train.py +27 -25
- deepdoctection/train/hf_detr_train.py +22 -18
- deepdoctection/train/hf_layoutlm_train.py +49 -48
- deepdoctection/train/tp_frcnn_train.py +10 -11
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +52 -14
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +41 -14
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +15 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/pdf_utils.py +39 -14
- deepdoctection/utils/settings.py +188 -182
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +70 -69
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
- deepdoctection-0.34.dist-info/RECORD +146 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.32.dist-info/RECORD +0 -146
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
deepdoctection/extern/model.py
CHANGED
|
@@ -20,9 +20,8 @@ Module for ModelCatalog and ModelDownloadManager
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
23
|
-
from copy import copy
|
|
24
23
|
from dataclasses import asdict, dataclass, field
|
|
25
|
-
from typing import Any,
|
|
24
|
+
from typing import Any, Mapping, Optional, Union
|
|
26
25
|
|
|
27
26
|
import jsonlines
|
|
28
27
|
from huggingface_hub import cached_download, hf_hub_url # type: ignore
|
|
@@ -32,11 +31,12 @@ from termcolor import colored
|
|
|
32
31
|
from ..utils.fs import download, get_configs_dir_path, get_weights_dir_path
|
|
33
32
|
from ..utils.logger import LoggingRecord, log_once, logger
|
|
34
33
|
from ..utils.settings import CellType, Languages, LayoutType, ObjectTypes, get_type
|
|
34
|
+
from ..utils.types import PathLikeOrStr
|
|
35
35
|
|
|
36
36
|
__all__ = ["ModelCatalog", "ModelDownloadManager", "print_model_infos", "ModelProfile"]
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
@dataclass
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
40
|
class ModelProfile:
|
|
41
41
|
"""
|
|
42
42
|
Class for model profile. Add for each model one ModelProfile to the ModelCatalog
|
|
@@ -45,25 +45,21 @@ class ModelProfile:
|
|
|
45
45
|
name: str
|
|
46
46
|
description: str
|
|
47
47
|
|
|
48
|
-
size:
|
|
48
|
+
size: list[int]
|
|
49
49
|
tp_model: bool = field(default=False)
|
|
50
50
|
config: Optional[str] = field(default=None)
|
|
51
51
|
preprocessor_config: Optional[str] = field(default=None)
|
|
52
52
|
hf_repo_id: Optional[str] = field(default=None)
|
|
53
53
|
hf_model_name: Optional[str] = field(default=None)
|
|
54
|
-
hf_config_file: Optional[
|
|
55
|
-
urls: Optional[
|
|
56
|
-
categories: Optional[
|
|
54
|
+
hf_config_file: Optional[list[str]] = field(default=None)
|
|
55
|
+
urls: Optional[list[str]] = field(default=None)
|
|
56
|
+
categories: Optional[Mapping[int, ObjectTypes]] = field(default=None)
|
|
57
|
+
categories_orig: Optional[Mapping[str, ObjectTypes]] = field(default=None)
|
|
57
58
|
dl_library: Optional[str] = field(default=None)
|
|
58
59
|
model_wrapper: Optional[str] = field(default=None)
|
|
59
60
|
architecture: Optional[str] = field(default=None)
|
|
60
61
|
|
|
61
|
-
def
|
|
62
|
-
"""updating categories to ObjectTypes. This might be necessary if we load a catalog from a file"""
|
|
63
|
-
if self.categories:
|
|
64
|
-
self.categories = {key: get_type(val) for key, val in self.categories.items()}
|
|
65
|
-
|
|
66
|
-
def as_dict(self) -> Dict[str, Any]:
|
|
62
|
+
def as_dict(self) -> dict[str, Any]:
|
|
67
63
|
"""
|
|
68
64
|
returns a dict of the dataclass
|
|
69
65
|
"""
|
|
@@ -94,7 +90,7 @@ class ModelCatalog:
|
|
|
94
90
|
ModelCatalog.get_full_path_configs("my_new_model")
|
|
95
91
|
"""
|
|
96
92
|
|
|
97
|
-
CATALOG:
|
|
93
|
+
CATALOG: dict[str, ModelProfile] = {
|
|
98
94
|
"layout/model-800000_inf_only.data-00000-of-00001": ModelProfile(
|
|
99
95
|
name="layout/model-800000_inf_only.data-00000-of-00001",
|
|
100
96
|
description="Tensorpack layout model for inference purposes trained on Publaynet",
|
|
@@ -105,11 +101,11 @@ class ModelCatalog:
|
|
|
105
101
|
hf_model_name="model-800000_inf_only",
|
|
106
102
|
hf_config_file=["conf_frcnn_layout.yaml"],
|
|
107
103
|
categories={
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
104
|
+
1: LayoutType.TEXT,
|
|
105
|
+
2: LayoutType.TITLE,
|
|
106
|
+
3: LayoutType.LIST,
|
|
107
|
+
4: LayoutType.TABLE,
|
|
108
|
+
5: LayoutType.FIGURE,
|
|
113
109
|
},
|
|
114
110
|
dl_library="TF",
|
|
115
111
|
model_wrapper="TPFrcnnDetector",
|
|
@@ -123,7 +119,7 @@ class ModelCatalog:
|
|
|
123
119
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
124
120
|
hf_model_name="model-1800000_inf_only",
|
|
125
121
|
hf_config_file=["conf_frcnn_cell.yaml"],
|
|
126
|
-
categories={
|
|
122
|
+
categories={1: LayoutType.CELL},
|
|
127
123
|
dl_library="TF",
|
|
128
124
|
model_wrapper="TPFrcnnDetector",
|
|
129
125
|
),
|
|
@@ -136,7 +132,7 @@ class ModelCatalog:
|
|
|
136
132
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
137
133
|
hf_model_name="model-1620000_inf_only",
|
|
138
134
|
hf_config_file=["conf_frcnn_rows.yaml"],
|
|
139
|
-
categories={
|
|
135
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
140
136
|
dl_library="TF",
|
|
141
137
|
model_wrapper="TPFrcnnDetector",
|
|
142
138
|
),
|
|
@@ -149,7 +145,7 @@ class ModelCatalog:
|
|
|
149
145
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc",
|
|
150
146
|
hf_model_name="model-1620000",
|
|
151
147
|
hf_config_file=["conf_frcnn_rows.yaml"],
|
|
152
|
-
categories={
|
|
148
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
153
149
|
dl_library="TF",
|
|
154
150
|
model_wrapper="TPFrcnnDetector",
|
|
155
151
|
),
|
|
@@ -164,11 +160,11 @@ class ModelCatalog:
|
|
|
164
160
|
hf_config_file=["conf_frcnn_layout.yaml"],
|
|
165
161
|
dl_library="TF",
|
|
166
162
|
categories={
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
163
|
+
1: LayoutType.TEXT,
|
|
164
|
+
2: LayoutType.TITLE,
|
|
165
|
+
3: LayoutType.LIST,
|
|
166
|
+
4: LayoutType.TABLE,
|
|
167
|
+
5: LayoutType.FIGURE,
|
|
172
168
|
},
|
|
173
169
|
model_wrapper="TPFrcnnDetector",
|
|
174
170
|
),
|
|
@@ -181,7 +177,7 @@ class ModelCatalog:
|
|
|
181
177
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c",
|
|
182
178
|
hf_model_name="model-1800000",
|
|
183
179
|
hf_config_file=["conf_frcnn_cell.yaml"],
|
|
184
|
-
categories={
|
|
180
|
+
categories={1: LayoutType.CELL},
|
|
185
181
|
dl_library="TF",
|
|
186
182
|
model_wrapper="TPFrcnnDetector",
|
|
187
183
|
),
|
|
@@ -195,11 +191,11 @@ class ModelCatalog:
|
|
|
195
191
|
hf_model_name="d2_model_0829999_layout_inf_only.pt",
|
|
196
192
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
197
193
|
categories={
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
194
|
+
1: LayoutType.TEXT,
|
|
195
|
+
2: LayoutType.TITLE,
|
|
196
|
+
3: LayoutType.LIST,
|
|
197
|
+
4: LayoutType.TABLE,
|
|
198
|
+
5: LayoutType.FIGURE,
|
|
203
199
|
},
|
|
204
200
|
dl_library="PT",
|
|
205
201
|
model_wrapper="D2FrcnnDetector",
|
|
@@ -214,11 +210,11 @@ class ModelCatalog:
|
|
|
214
210
|
hf_model_name="d2_model_0829999_layout.pth",
|
|
215
211
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
216
212
|
categories={
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
213
|
+
1: LayoutType.TEXT,
|
|
214
|
+
2: LayoutType.TITLE,
|
|
215
|
+
3: LayoutType.LIST,
|
|
216
|
+
4: LayoutType.TABLE,
|
|
217
|
+
5: LayoutType.FIGURE,
|
|
222
218
|
},
|
|
223
219
|
dl_library="PT",
|
|
224
220
|
model_wrapper="D2FrcnnDetector",
|
|
@@ -233,11 +229,11 @@ class ModelCatalog:
|
|
|
233
229
|
hf_model_name="d2_model_0829999_layout_inf_only.ts",
|
|
234
230
|
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
235
231
|
categories={
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
232
|
+
1: LayoutType.TEXT,
|
|
233
|
+
2: LayoutType.TITLE,
|
|
234
|
+
3: LayoutType.LIST,
|
|
235
|
+
4: LayoutType.TABLE,
|
|
236
|
+
5: LayoutType.FIGURE,
|
|
241
237
|
},
|
|
242
238
|
dl_library="PT",
|
|
243
239
|
model_wrapper="D2FrcnnTracingDetector",
|
|
@@ -251,7 +247,7 @@ class ModelCatalog:
|
|
|
251
247
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
252
248
|
hf_model_name="d2_model_1849999_cell_inf_only.pt",
|
|
253
249
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
254
|
-
categories={
|
|
250
|
+
categories={1: LayoutType.CELL},
|
|
255
251
|
dl_library="PT",
|
|
256
252
|
model_wrapper="D2FrcnnDetector",
|
|
257
253
|
),
|
|
@@ -264,7 +260,7 @@ class ModelCatalog:
|
|
|
264
260
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
265
261
|
hf_model_name="d2_model_1849999_cell_inf_only.ts",
|
|
266
262
|
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
267
|
-
categories={
|
|
263
|
+
categories={1: LayoutType.CELL},
|
|
268
264
|
dl_library="PT",
|
|
269
265
|
model_wrapper="D2FrcnnTracingDetector",
|
|
270
266
|
),
|
|
@@ -277,7 +273,7 @@ class ModelCatalog:
|
|
|
277
273
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
278
274
|
hf_model_name="cell/d2_model_1849999_cell.pth",
|
|
279
275
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
280
|
-
categories={
|
|
276
|
+
categories={1: LayoutType.CELL},
|
|
281
277
|
dl_library="PT",
|
|
282
278
|
model_wrapper="D2FrcnnDetector",
|
|
283
279
|
),
|
|
@@ -290,7 +286,7 @@ class ModelCatalog:
|
|
|
290
286
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
291
287
|
hf_model_name="d2_model_1639999_item.pth",
|
|
292
288
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
293
|
-
categories={
|
|
289
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
294
290
|
dl_library="PT",
|
|
295
291
|
model_wrapper="D2FrcnnDetector",
|
|
296
292
|
),
|
|
@@ -303,7 +299,7 @@ class ModelCatalog:
|
|
|
303
299
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
304
300
|
hf_model_name="d2_model_1639999_item_inf_only.pt",
|
|
305
301
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
306
|
-
categories={
|
|
302
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
307
303
|
dl_library="PT",
|
|
308
304
|
model_wrapper="D2FrcnnDetector",
|
|
309
305
|
),
|
|
@@ -316,7 +312,7 @@ class ModelCatalog:
|
|
|
316
312
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
317
313
|
hf_model_name="d2_model_1639999_item_inf_only.ts",
|
|
318
314
|
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
319
|
-
categories={
|
|
315
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
320
316
|
dl_library="PT",
|
|
321
317
|
model_wrapper="D2FrcnnTracingDetector",
|
|
322
318
|
),
|
|
@@ -453,7 +449,7 @@ class ModelCatalog:
|
|
|
453
449
|
hf_repo_id="microsoft/table-transformer-detection",
|
|
454
450
|
hf_model_name="pytorch_model.bin",
|
|
455
451
|
hf_config_file=["config.json", "preprocessor_config.json"],
|
|
456
|
-
categories={
|
|
452
|
+
categories={1: LayoutType.TABLE, 2: LayoutType.TABLE_ROTATED},
|
|
457
453
|
dl_library="PT",
|
|
458
454
|
model_wrapper="HFDetrDerivedDetector",
|
|
459
455
|
),
|
|
@@ -471,12 +467,12 @@ class ModelCatalog:
|
|
|
471
467
|
hf_model_name="pytorch_model.bin",
|
|
472
468
|
hf_config_file=["config.json", "preprocessor_config.json"],
|
|
473
469
|
categories={
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
470
|
+
1: LayoutType.TABLE,
|
|
471
|
+
2: LayoutType.COLUMN,
|
|
472
|
+
3: LayoutType.ROW,
|
|
473
|
+
4: CellType.COLUMN_HEADER,
|
|
474
|
+
5: CellType.PROJECTED_ROW_HEADER,
|
|
475
|
+
6: CellType.SPANNING,
|
|
480
476
|
},
|
|
481
477
|
dl_library="PT",
|
|
482
478
|
model_wrapper="HFDetrDerivedDetector",
|
|
@@ -488,7 +484,7 @@ class ModelCatalog:
|
|
|
488
484
|
"https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.",
|
|
489
485
|
size=[101971449],
|
|
490
486
|
urls=["https://doctr-static.mindee.com/models?id=v0.3.1/db_resnet50-ac60cadc.pt&src=0"],
|
|
491
|
-
categories={
|
|
487
|
+
categories={1: LayoutType.WORD},
|
|
492
488
|
dl_library="PT",
|
|
493
489
|
model_wrapper="DoctrTextlineDetector",
|
|
494
490
|
architecture="db_resnet50",
|
|
@@ -500,7 +496,7 @@ class ModelCatalog:
|
|
|
500
496
|
"https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.",
|
|
501
497
|
size=[94178964],
|
|
502
498
|
urls=["https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0"],
|
|
503
|
-
categories={
|
|
499
|
+
categories={1: LayoutType.WORD},
|
|
504
500
|
dl_library="TF",
|
|
505
501
|
model_wrapper="DoctrTextlineDetector",
|
|
506
502
|
architecture="db_resnet50",
|
|
@@ -548,189 +544,367 @@ class ModelCatalog:
|
|
|
548
544
|
size=[131266198],
|
|
549
545
|
urls=["https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"],
|
|
550
546
|
categories={
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
547
|
+
1: Languages.ENGLISH,
|
|
548
|
+
2: Languages.RUSSIAN,
|
|
549
|
+
3: Languages.GERMAN,
|
|
550
|
+
4: Languages.FRENCH,
|
|
551
|
+
5: Languages.ITALIAN,
|
|
552
|
+
6: Languages.JAPANESE,
|
|
553
|
+
7: Languages.SPANISH,
|
|
554
|
+
8: Languages.CEBUANO,
|
|
555
|
+
9: Languages.TURKISH,
|
|
556
|
+
10: Languages.PORTUGUESE,
|
|
557
|
+
11: Languages.UKRAINIAN,
|
|
558
|
+
12: Languages.ESPERANTO,
|
|
559
|
+
13: Languages.POLISH,
|
|
560
|
+
14: Languages.SWEDISH,
|
|
561
|
+
15: Languages.DUTCH,
|
|
562
|
+
16: Languages.HEBREW,
|
|
563
|
+
17: Languages.CHINESE,
|
|
564
|
+
18: Languages.HUNGARIAN,
|
|
565
|
+
19: Languages.ARABIC,
|
|
566
|
+
20: Languages.CATALAN,
|
|
567
|
+
21: Languages.FINNISH,
|
|
568
|
+
22: Languages.CZECH,
|
|
569
|
+
23: Languages.PERSIAN,
|
|
570
|
+
24: Languages.SERBIAN,
|
|
571
|
+
25: Languages.GREEK,
|
|
572
|
+
26: Languages.VIETNAMESE,
|
|
573
|
+
27: Languages.BULGARIAN,
|
|
574
|
+
28: Languages.KOREAN,
|
|
575
|
+
29: Languages.NORWEGIAN,
|
|
576
|
+
30: Languages.MACEDONIAN,
|
|
577
|
+
31: Languages.ROMANIAN,
|
|
578
|
+
32: Languages.INDONESIAN,
|
|
579
|
+
33: Languages.THAI,
|
|
580
|
+
34: Languages.ARMENIAN,
|
|
581
|
+
35: Languages.DANISH,
|
|
582
|
+
36: Languages.TAMIL,
|
|
583
|
+
37: Languages.HINDI,
|
|
584
|
+
38: Languages.CROATIAN,
|
|
585
|
+
39: Languages.NOT_DEFINED,
|
|
586
|
+
40: Languages.BELARUSIAN,
|
|
587
|
+
41: Languages.GEORGIAN,
|
|
588
|
+
42: Languages.TELUGU,
|
|
589
|
+
43: Languages.KAZAKH,
|
|
590
|
+
44: Languages.WARAY,
|
|
591
|
+
45: Languages.LITHUANIAN,
|
|
592
|
+
46: Languages.SCOTTISH,
|
|
593
|
+
47: Languages.SLOVAK,
|
|
594
|
+
48: Languages.BENIN,
|
|
595
|
+
49: Languages.BASQUE,
|
|
596
|
+
50: Languages.SLOVENIAN,
|
|
597
|
+
51: Languages.NOT_DEFINED,
|
|
598
|
+
52: Languages.MALAYALAM,
|
|
599
|
+
53: Languages.MARATHI,
|
|
600
|
+
54: Languages.ESTONIAN,
|
|
601
|
+
55: Languages.AZERBAIJANI,
|
|
602
|
+
56: Languages.NOT_DEFINED,
|
|
603
|
+
57: Languages.ALBANIAN,
|
|
604
|
+
58: Languages.LATIN,
|
|
605
|
+
59: Languages.BOSNIAN,
|
|
606
|
+
60: Languages.NORWEGIAN_NOVOSIBIRSK,
|
|
607
|
+
61: Languages.URDU,
|
|
608
|
+
62: Languages.NOT_DEFINED,
|
|
609
|
+
63: Languages.NOT_DEFINED,
|
|
610
|
+
64: Languages.NOT_DEFINED,
|
|
611
|
+
65: Languages.NOT_DEFINED,
|
|
612
|
+
66: Languages.NOT_DEFINED,
|
|
613
|
+
67: Languages.NOT_DEFINED,
|
|
614
|
+
68: Languages.NOT_DEFINED,
|
|
615
|
+
69: Languages.NOT_DEFINED,
|
|
616
|
+
70: Languages.NOT_DEFINED,
|
|
617
|
+
71: Languages.NOT_DEFINED,
|
|
618
|
+
72: Languages.NOT_DEFINED,
|
|
619
|
+
73: Languages.NOT_DEFINED,
|
|
620
|
+
74: Languages.NOT_DEFINED,
|
|
621
|
+
75: Languages.NOT_DEFINED,
|
|
622
|
+
76: Languages.NOT_DEFINED,
|
|
623
|
+
77: Languages.NOT_DEFINED,
|
|
624
|
+
78: Languages.NOT_DEFINED,
|
|
625
|
+
79: Languages.NOT_DEFINED,
|
|
626
|
+
80: Languages.NOT_DEFINED,
|
|
627
|
+
81: Languages.NOT_DEFINED,
|
|
628
|
+
82: Languages.NOT_DEFINED,
|
|
629
|
+
83: Languages.NOT_DEFINED,
|
|
630
|
+
84: Languages.NOT_DEFINED,
|
|
631
|
+
85: Languages.NOT_DEFINED,
|
|
632
|
+
86: Languages.NOT_DEFINED,
|
|
633
|
+
87: Languages.NOT_DEFINED,
|
|
634
|
+
88: Languages.NOT_DEFINED,
|
|
635
|
+
89: Languages.NOT_DEFINED,
|
|
636
|
+
90: Languages.NOT_DEFINED,
|
|
637
|
+
91: Languages.NOT_DEFINED,
|
|
638
|
+
92: Languages.NOT_DEFINED,
|
|
639
|
+
93: Languages.NOT_DEFINED,
|
|
640
|
+
94: Languages.NOT_DEFINED,
|
|
641
|
+
95: Languages.NOT_DEFINED,
|
|
642
|
+
96: Languages.NOT_DEFINED,
|
|
643
|
+
97: Languages.NOT_DEFINED,
|
|
644
|
+
98: Languages.NOT_DEFINED,
|
|
645
|
+
99: Languages.NOT_DEFINED,
|
|
646
|
+
100: Languages.NOT_DEFINED,
|
|
647
|
+
101: Languages.NOT_DEFINED,
|
|
648
|
+
102: Languages.NOT_DEFINED,
|
|
649
|
+
103: Languages.NOT_DEFINED,
|
|
650
|
+
104: Languages.NOT_DEFINED,
|
|
651
|
+
105: Languages.NOT_DEFINED,
|
|
652
|
+
106: Languages.NOT_DEFINED,
|
|
653
|
+
107: Languages.NOT_DEFINED,
|
|
654
|
+
108: Languages.NOT_DEFINED,
|
|
655
|
+
109: Languages.NOT_DEFINED,
|
|
656
|
+
110: Languages.NOT_DEFINED,
|
|
657
|
+
111: Languages.NOT_DEFINED,
|
|
658
|
+
112: Languages.NOT_DEFINED,
|
|
659
|
+
113: Languages.NOT_DEFINED,
|
|
660
|
+
114: Languages.NOT_DEFINED,
|
|
661
|
+
115: Languages.NOT_DEFINED,
|
|
662
|
+
116: Languages.NOT_DEFINED,
|
|
663
|
+
117: Languages.NOT_DEFINED,
|
|
664
|
+
118: Languages.NOT_DEFINED,
|
|
665
|
+
119: Languages.NOT_DEFINED,
|
|
666
|
+
120: Languages.NOT_DEFINED,
|
|
667
|
+
121: Languages.NOT_DEFINED,
|
|
668
|
+
122: Languages.NOT_DEFINED,
|
|
669
|
+
123: Languages.NOT_DEFINED,
|
|
670
|
+
124: Languages.NOT_DEFINED,
|
|
671
|
+
125: Languages.NOT_DEFINED,
|
|
672
|
+
126: Languages.NOT_DEFINED,
|
|
673
|
+
127: Languages.NOT_DEFINED,
|
|
674
|
+
128: Languages.NOT_DEFINED,
|
|
675
|
+
129: Languages.NOT_DEFINED,
|
|
676
|
+
130: Languages.NOT_DEFINED,
|
|
677
|
+
131: Languages.NOT_DEFINED,
|
|
678
|
+
132: Languages.NOT_DEFINED,
|
|
679
|
+
133: Languages.NOT_DEFINED,
|
|
680
|
+
134: Languages.NOT_DEFINED,
|
|
681
|
+
135: Languages.NOT_DEFINED,
|
|
682
|
+
136: Languages.NOT_DEFINED,
|
|
683
|
+
137: Languages.NOT_DEFINED,
|
|
684
|
+
138: Languages.NOT_DEFINED,
|
|
685
|
+
139: Languages.NOT_DEFINED,
|
|
686
|
+
140: Languages.NOT_DEFINED,
|
|
687
|
+
141: Languages.NOT_DEFINED,
|
|
688
|
+
142: Languages.NOT_DEFINED,
|
|
689
|
+
143: Languages.NOT_DEFINED,
|
|
690
|
+
144: Languages.NOT_DEFINED,
|
|
691
|
+
145: Languages.NOT_DEFINED,
|
|
692
|
+
146: Languages.NOT_DEFINED,
|
|
693
|
+
147: Languages.NOT_DEFINED,
|
|
694
|
+
148: Languages.NOT_DEFINED,
|
|
695
|
+
149: Languages.NOT_DEFINED,
|
|
696
|
+
150: Languages.NOT_DEFINED,
|
|
697
|
+
151: Languages.NOT_DEFINED,
|
|
698
|
+
152: Languages.NOT_DEFINED,
|
|
699
|
+
153: Languages.NOT_DEFINED,
|
|
700
|
+
154: Languages.NOT_DEFINED,
|
|
701
|
+
155: Languages.NOT_DEFINED,
|
|
702
|
+
156: Languages.NOT_DEFINED,
|
|
703
|
+
157: Languages.NOT_DEFINED,
|
|
704
|
+
158: Languages.NOT_DEFINED,
|
|
705
|
+
159: Languages.NOT_DEFINED,
|
|
706
|
+
160: Languages.NOT_DEFINED,
|
|
707
|
+
161: Languages.NOT_DEFINED,
|
|
708
|
+
162: Languages.NOT_DEFINED,
|
|
709
|
+
163: Languages.NOT_DEFINED,
|
|
710
|
+
164: Languages.NOT_DEFINED,
|
|
711
|
+
165: Languages.NOT_DEFINED,
|
|
712
|
+
166: Languages.NOT_DEFINED,
|
|
713
|
+
167: Languages.NOT_DEFINED,
|
|
714
|
+
168: Languages.NOT_DEFINED,
|
|
715
|
+
169: Languages.NOT_DEFINED,
|
|
716
|
+
170: Languages.NOT_DEFINED,
|
|
717
|
+
171: Languages.NOT_DEFINED,
|
|
718
|
+
172: Languages.NOT_DEFINED,
|
|
719
|
+
173: Languages.NOT_DEFINED,
|
|
720
|
+
174: Languages.NOT_DEFINED,
|
|
721
|
+
175: Languages.NOT_DEFINED,
|
|
722
|
+
176: Languages.NOT_DEFINED,
|
|
723
|
+
},
|
|
724
|
+
categories_orig={
|
|
725
|
+
"__label__en": Languages.ENGLISH,
|
|
726
|
+
"__label__ru": Languages.RUSSIAN,
|
|
727
|
+
"__label__de": Languages.GERMAN,
|
|
728
|
+
"__label__fr": Languages.FRENCH,
|
|
729
|
+
"__label__it": Languages.ITALIAN,
|
|
730
|
+
"__label__ja": Languages.JAPANESE,
|
|
731
|
+
"__label__es": Languages.SPANISH,
|
|
732
|
+
"__label__ceb": Languages.CEBUANO,
|
|
733
|
+
"__label__tr": Languages.TURKISH,
|
|
734
|
+
"__label__pt": Languages.PORTUGUESE,
|
|
735
|
+
"__label__uk": Languages.UKRAINIAN,
|
|
736
|
+
"__label__eo": Languages.ESPERANTO,
|
|
737
|
+
"__label__pl": Languages.POLISH,
|
|
738
|
+
"__label__sv": Languages.SWEDISH,
|
|
739
|
+
"__label__nl": Languages.DUTCH,
|
|
740
|
+
"__label__he": Languages.HEBREW,
|
|
741
|
+
"__label__zh": Languages.CHINESE,
|
|
742
|
+
"__label__hu": Languages.HUNGARIAN,
|
|
743
|
+
"__label__ar": Languages.ARABIC,
|
|
744
|
+
"__label__ca": Languages.CATALAN,
|
|
745
|
+
"__label__fi": Languages.FINNISH,
|
|
746
|
+
"__label__cs": Languages.CZECH,
|
|
747
|
+
"__label__fa": Languages.PERSIAN,
|
|
748
|
+
"__label__sr": Languages.SERBIAN,
|
|
749
|
+
"__label__el": Languages.GREEK,
|
|
750
|
+
"__label__vi": Languages.VIETNAMESE,
|
|
751
|
+
"__label__bg": Languages.BULGARIAN,
|
|
752
|
+
"__label__ko": Languages.KOREAN,
|
|
753
|
+
"__label__no": Languages.NORWEGIAN,
|
|
754
|
+
"__label__mk": Languages.MACEDONIAN,
|
|
755
|
+
"__label__ro": Languages.ROMANIAN,
|
|
756
|
+
"__label__id": Languages.INDONESIAN,
|
|
757
|
+
"__label__th": Languages.THAI,
|
|
758
|
+
"__label__hy": Languages.ARMENIAN,
|
|
759
|
+
"__label__da": Languages.DANISH,
|
|
760
|
+
"__label__ta": Languages.TAMIL,
|
|
761
|
+
"__label__hi": Languages.HINDI,
|
|
762
|
+
"__label__hr": Languages.CROATIAN,
|
|
763
|
+
"__label__sh": Languages.NOT_DEFINED,
|
|
764
|
+
"__label__be": Languages.BELARUSIAN,
|
|
765
|
+
"__label__ka": Languages.GEORGIAN,
|
|
766
|
+
"__label__te": Languages.TELUGU,
|
|
767
|
+
"__label__kk": Languages.KAZAKH,
|
|
768
|
+
"__label__war": Languages.WARAY,
|
|
769
|
+
"__label__lt": Languages.LITHUANIAN,
|
|
770
|
+
"__label__gl": Languages.SCOTTISH,
|
|
771
|
+
"__label__sk": Languages.SLOVAK,
|
|
772
|
+
"__label__bn": Languages.BENIN,
|
|
773
|
+
"__label__eu": Languages.BASQUE,
|
|
774
|
+
"__label__sl": Languages.SLOVENIAN,
|
|
775
|
+
"__label__kn": Languages.NOT_DEFINED,
|
|
776
|
+
"__label__ml": Languages.MALAYALAM,
|
|
777
|
+
"__label__mr": Languages.MARATHI,
|
|
778
|
+
"__label__et": Languages.ESTONIAN,
|
|
779
|
+
"__label__az": Languages.AZERBAIJANI,
|
|
780
|
+
"__label__ms": Languages.NOT_DEFINED,
|
|
781
|
+
"__label__sq": Languages.ALBANIAN,
|
|
782
|
+
"__label__la": Languages.LATIN,
|
|
783
|
+
"__label__bs": Languages.BOSNIAN,
|
|
784
|
+
"__label__nn": Languages.NORWEGIAN_NOVOSIBIRSK,
|
|
785
|
+
"__label__ur": Languages.URDU,
|
|
786
|
+
"__label__lv": Languages.NOT_DEFINED,
|
|
787
|
+
"__label__my": Languages.NOT_DEFINED,
|
|
788
|
+
"__label__tt": Languages.NOT_DEFINED,
|
|
789
|
+
"__label__af": Languages.NOT_DEFINED,
|
|
790
|
+
"__label__oc": Languages.NOT_DEFINED,
|
|
791
|
+
"__label__nds": Languages.NOT_DEFINED,
|
|
792
|
+
"__label__ky": Languages.NOT_DEFINED,
|
|
793
|
+
"__label__ast": Languages.NOT_DEFINED,
|
|
794
|
+
"__label__tl": Languages.NOT_DEFINED,
|
|
795
|
+
"__label__is": Languages.NOT_DEFINED,
|
|
796
|
+
"__label__ia": Languages.NOT_DEFINED,
|
|
797
|
+
"__label__si": Languages.NOT_DEFINED,
|
|
798
|
+
"__label__gu": Languages.NOT_DEFINED,
|
|
799
|
+
"__label__km": Languages.NOT_DEFINED,
|
|
800
|
+
"__label__br": Languages.NOT_DEFINED,
|
|
801
|
+
"__label__ba": Languages.NOT_DEFINED,
|
|
802
|
+
"__label__uz": Languages.NOT_DEFINED,
|
|
803
|
+
"__label__bo": Languages.NOT_DEFINED,
|
|
804
|
+
"__label__pa": Languages.NOT_DEFINED,
|
|
805
|
+
"__label__vo": Languages.NOT_DEFINED,
|
|
806
|
+
"__label__als": Languages.NOT_DEFINED,
|
|
807
|
+
"__label__ne": Languages.NOT_DEFINED,
|
|
808
|
+
"__label__cy": Languages.NOT_DEFINED,
|
|
809
|
+
"__label__jbo": Languages.NOT_DEFINED,
|
|
810
|
+
"__label__fy": Languages.NOT_DEFINED,
|
|
811
|
+
"__label__mn": Languages.NOT_DEFINED,
|
|
812
|
+
"__label__lb": Languages.NOT_DEFINED,
|
|
813
|
+
"__label__ce": Languages.NOT_DEFINED,
|
|
814
|
+
"__label__ug": Languages.NOT_DEFINED,
|
|
815
|
+
"__label__tg": Languages.NOT_DEFINED,
|
|
816
|
+
"__label__sco": Languages.NOT_DEFINED,
|
|
817
|
+
"__label__sa": Languages.NOT_DEFINED,
|
|
818
|
+
"__label__cv": Languages.NOT_DEFINED,
|
|
819
|
+
"__label__jv": Languages.NOT_DEFINED,
|
|
820
|
+
"__label__min": Languages.NOT_DEFINED,
|
|
821
|
+
"__label__io": Languages.NOT_DEFINED,
|
|
822
|
+
"__label__or": Languages.NOT_DEFINED,
|
|
823
|
+
"__label__as": Languages.NOT_DEFINED,
|
|
824
|
+
"__label__new": Languages.NOT_DEFINED,
|
|
825
|
+
"__label__ga": Languages.NOT_DEFINED,
|
|
826
|
+
"__label__mg": Languages.NOT_DEFINED,
|
|
827
|
+
"__label__an": Languages.NOT_DEFINED,
|
|
828
|
+
"__label__ckb": Languages.NOT_DEFINED,
|
|
829
|
+
"__label__sw": Languages.NOT_DEFINED,
|
|
830
|
+
"__label__bar": Languages.NOT_DEFINED,
|
|
831
|
+
"__label__lmo": Languages.NOT_DEFINED,
|
|
832
|
+
"__label__yi": Languages.NOT_DEFINED,
|
|
833
|
+
"__label__arz": Languages.NOT_DEFINED,
|
|
834
|
+
"__label__mhr": Languages.NOT_DEFINED,
|
|
835
|
+
"__label__azb": Languages.NOT_DEFINED,
|
|
836
|
+
"__label__sah": Languages.NOT_DEFINED,
|
|
837
|
+
"__label__pnb": Languages.NOT_DEFINED,
|
|
838
|
+
"__label__su": Languages.NOT_DEFINED,
|
|
839
|
+
"__label__bpy": Languages.NOT_DEFINED,
|
|
840
|
+
"__label__pms": Languages.NOT_DEFINED,
|
|
841
|
+
"__label__ilo": Languages.NOT_DEFINED,
|
|
842
|
+
"__label__wuu": Languages.NOT_DEFINED,
|
|
843
|
+
"__label__ku": Languages.NOT_DEFINED,
|
|
844
|
+
"__label__ps": Languages.NOT_DEFINED,
|
|
845
|
+
"__label__ie": Languages.NOT_DEFINED,
|
|
846
|
+
"__label__xmf": Languages.NOT_DEFINED,
|
|
847
|
+
"__label__yue": Languages.NOT_DEFINED,
|
|
848
|
+
"__label__gom": Languages.NOT_DEFINED,
|
|
849
|
+
"__label__li": Languages.NOT_DEFINED,
|
|
850
|
+
"__label__mwl": Languages.NOT_DEFINED,
|
|
851
|
+
"__label__kw": Languages.NOT_DEFINED,
|
|
852
|
+
"__label__sd": Languages.NOT_DEFINED,
|
|
853
|
+
"__label__hsb": Languages.NOT_DEFINED,
|
|
854
|
+
"__label__scn": Languages.NOT_DEFINED,
|
|
855
|
+
"__label__gd": Languages.NOT_DEFINED,
|
|
856
|
+
"__label__pam": Languages.NOT_DEFINED,
|
|
857
|
+
"__label__bh": Languages.NOT_DEFINED,
|
|
858
|
+
"__label__mai": Languages.NOT_DEFINED,
|
|
859
|
+
"__label__vec": Languages.NOT_DEFINED,
|
|
860
|
+
"__label__mt": Languages.NOT_DEFINED,
|
|
861
|
+
"__label__dv": Languages.NOT_DEFINED,
|
|
862
|
+
"__label__wa": Languages.NOT_DEFINED,
|
|
863
|
+
"__label__mzn": Languages.NOT_DEFINED,
|
|
864
|
+
"__label__am": Languages.NOT_DEFINED,
|
|
865
|
+
"__label__qu": Languages.NOT_DEFINED,
|
|
866
|
+
"__label__eml": Languages.NOT_DEFINED,
|
|
867
|
+
"__label__cbk": Languages.NOT_DEFINED,
|
|
868
|
+
"__label__tk": Languages.NOT_DEFINED,
|
|
869
|
+
"__label__rm": Languages.NOT_DEFINED,
|
|
870
|
+
"__label__os": Languages.NOT_DEFINED,
|
|
871
|
+
"__label__vls": Languages.NOT_DEFINED,
|
|
872
|
+
"__label__yo": Languages.NOT_DEFINED,
|
|
873
|
+
"__label__lo": Languages.NOT_DEFINED,
|
|
874
|
+
"__label__lez": Languages.NOT_DEFINED,
|
|
875
|
+
"__label__so": Languages.NOT_DEFINED,
|
|
876
|
+
"__label__myv": Languages.NOT_DEFINED,
|
|
877
|
+
"__label__diq": Languages.NOT_DEFINED,
|
|
878
|
+
"__label__mrj": Languages.NOT_DEFINED,
|
|
879
|
+
"__label__dsb": Languages.NOT_DEFINED,
|
|
880
|
+
"__label__frr": Languages.NOT_DEFINED,
|
|
881
|
+
"__label__ht": Languages.NOT_DEFINED,
|
|
882
|
+
"__label__gn": Languages.NOT_DEFINED,
|
|
883
|
+
"__label__bxr": Languages.NOT_DEFINED,
|
|
884
|
+
"__label__kv": Languages.NOT_DEFINED,
|
|
885
|
+
"__label__sc": Languages.NOT_DEFINED,
|
|
886
|
+
"__label__nah": Languages.NOT_DEFINED,
|
|
887
|
+
"__label__krc": Languages.NOT_DEFINED,
|
|
888
|
+
"__label__bcl": Languages.NOT_DEFINED,
|
|
889
|
+
"__label__nap": Languages.NOT_DEFINED,
|
|
890
|
+
"__label__gv": Languages.NOT_DEFINED,
|
|
891
|
+
"__label__av": Languages.NOT_DEFINED,
|
|
892
|
+
"__label__rue": Languages.NOT_DEFINED,
|
|
893
|
+
"__label__xal": Languages.NOT_DEFINED,
|
|
894
|
+
"__label__pfl": Languages.NOT_DEFINED,
|
|
895
|
+
"__label__dty": Languages.NOT_DEFINED,
|
|
896
|
+
"__label__hif": Languages.NOT_DEFINED,
|
|
897
|
+
"__label__co": Languages.NOT_DEFINED,
|
|
898
|
+
"__label__lrc": Languages.NOT_DEFINED,
|
|
899
|
+
"__label__vep": Languages.NOT_DEFINED,
|
|
900
|
+
"__label__tyv": Languages.NOT_DEFINED,
|
|
727
901
|
},
|
|
728
902
|
model_wrapper="FasttextLangDetector",
|
|
729
903
|
),
|
|
730
904
|
}
|
|
731
905
|
|
|
732
906
|
@staticmethod
|
|
733
|
-
def get_full_path_weights(name:
|
|
907
|
+
def get_full_path_weights(name: PathLikeOrStr) -> PathLikeOrStr:
|
|
734
908
|
"""
|
|
735
909
|
Returns the absolute path of weights.
|
|
736
910
|
|
|
@@ -741,7 +915,7 @@ class ModelCatalog:
|
|
|
741
915
|
:return: absolute weight path
|
|
742
916
|
"""
|
|
743
917
|
try:
|
|
744
|
-
profile = ModelCatalog.get_profile(name)
|
|
918
|
+
profile = ModelCatalog.get_profile(os.fspath(name))
|
|
745
919
|
except KeyError:
|
|
746
920
|
logger.info(
|
|
747
921
|
LoggingRecord(
|
|
@@ -761,7 +935,7 @@ class ModelCatalog:
|
|
|
761
935
|
return os.path.join(get_weights_dir_path(), name)
|
|
762
936
|
|
|
763
937
|
@staticmethod
|
|
764
|
-
def get_full_path_configs(name:
|
|
938
|
+
def get_full_path_configs(name: PathLikeOrStr) -> PathLikeOrStr:
|
|
765
939
|
"""
|
|
766
940
|
Return the absolute path of configs for some given weights. Alternatively, pass last a path to a config file
|
|
767
941
|
(without the base path to the cache config directory).
|
|
@@ -773,7 +947,7 @@ class ModelCatalog:
|
|
|
773
947
|
:return: absolute path to the config
|
|
774
948
|
"""
|
|
775
949
|
try:
|
|
776
|
-
profile = ModelCatalog.get_profile(name)
|
|
950
|
+
profile = ModelCatalog.get_profile(os.fspath(name))
|
|
777
951
|
except KeyError:
|
|
778
952
|
logger.info(
|
|
779
953
|
LoggingRecord(
|
|
@@ -787,7 +961,7 @@ class ModelCatalog:
|
|
|
787
961
|
return os.path.join(get_configs_dir_path(), name)
|
|
788
962
|
|
|
789
963
|
@staticmethod
|
|
790
|
-
def get_full_path_preprocessor_configs(name: str) ->
|
|
964
|
+
def get_full_path_preprocessor_configs(name: Union[str]) -> PathLikeOrStr:
|
|
791
965
|
"""
|
|
792
966
|
Return the absolute path of preprocessor configs for some given weights. Preprocessor are occasionally provided
|
|
793
967
|
by the transformer library.
|
|
@@ -811,21 +985,21 @@ class ModelCatalog:
|
|
|
811
985
|
return os.path.join(get_configs_dir_path(), name)
|
|
812
986
|
|
|
813
987
|
@staticmethod
|
|
814
|
-
def get_model_list() ->
|
|
988
|
+
def get_model_list() -> list[PathLikeOrStr]:
|
|
815
989
|
"""
|
|
816
990
|
Returns a list of absolute paths of registered models.
|
|
817
991
|
"""
|
|
818
992
|
return [os.path.join(get_weights_dir_path(), profile.name) for profile in ModelCatalog.CATALOG.values()]
|
|
819
993
|
|
|
820
994
|
@staticmethod
|
|
821
|
-
def get_profile_list() ->
|
|
995
|
+
def get_profile_list() -> list[str]:
|
|
822
996
|
"""
|
|
823
997
|
Returns a list profile keys.
|
|
824
998
|
"""
|
|
825
999
|
return list(ModelCatalog.CATALOG.keys())
|
|
826
1000
|
|
|
827
1001
|
@staticmethod
|
|
828
|
-
def is_registered(path_weights:
|
|
1002
|
+
def is_registered(path_weights: PathLikeOrStr) -> bool:
|
|
829
1003
|
"""
|
|
830
1004
|
Checks if some weights belong to a registered model
|
|
831
1005
|
|
|
@@ -849,8 +1023,8 @@ class ModelCatalog:
|
|
|
849
1023
|
|
|
850
1024
|
profile = ModelCatalog.CATALOG.get(name)
|
|
851
1025
|
if profile is not None:
|
|
852
|
-
return
|
|
853
|
-
raise KeyError("Model Profile does not exist. Please make sure the model is registered")
|
|
1026
|
+
return profile
|
|
1027
|
+
raise KeyError(f"Model Profile {name} does not exist. Please make sure the model is registered")
|
|
854
1028
|
|
|
855
1029
|
@staticmethod
|
|
856
1030
|
def register(name: str, profile: ModelProfile) -> None:
|
|
@@ -866,7 +1040,7 @@ class ModelCatalog:
|
|
|
866
1040
|
ModelCatalog.CATALOG[name] = profile
|
|
867
1041
|
|
|
868
1042
|
@staticmethod
|
|
869
|
-
def load_profiles_from_file(path: Optional[
|
|
1043
|
+
def load_profiles_from_file(path: Optional[PathLikeOrStr] = None) -> None:
|
|
870
1044
|
"""
|
|
871
1045
|
Load model profiles from a jsonl file and extend `CATALOG` with the new profiles.
|
|
872
1046
|
|
|
@@ -877,10 +1051,12 @@ class ModelCatalog:
|
|
|
877
1051
|
with jsonlines.open(path) as reader:
|
|
878
1052
|
for obj in reader:
|
|
879
1053
|
if not obj["name"] in ModelCatalog.CATALOG:
|
|
1054
|
+
categories = obj.get("categories") or {}
|
|
1055
|
+
obj["categories"] = {int(key): get_type(val) for key, val in categories.items()}
|
|
880
1056
|
ModelCatalog.register(obj["name"], ModelProfile(**obj))
|
|
881
1057
|
|
|
882
1058
|
@staticmethod
|
|
883
|
-
def save_profiles_to_file(target_path:
|
|
1059
|
+
def save_profiles_to_file(target_path: PathLikeOrStr) -> None:
|
|
884
1060
|
"""
|
|
885
1061
|
Save model profiles to a jsonl file.
|
|
886
1062
|
|
|
@@ -896,7 +1072,7 @@ class ModelCatalog:
|
|
|
896
1072
|
ModelCatalog.load_profiles_from_file(os.environ.get("MODEL_CATALOG", None))
|
|
897
1073
|
|
|
898
1074
|
|
|
899
|
-
def get_tp_weight_names(name: str) ->
|
|
1075
|
+
def get_tp_weight_names(name: str) -> list[str]:
|
|
900
1076
|
"""
|
|
901
1077
|
Given a path to some model weights it will return all file names according to TP naming convention
|
|
902
1078
|
|
|
@@ -922,7 +1098,7 @@ def print_model_infos(add_description: bool = True, add_config: bool = True, add
|
|
|
922
1098
|
num_columns = min(6, len(profiles))
|
|
923
1099
|
infos = []
|
|
924
1100
|
for profile in profiles:
|
|
925
|
-
tbl_input:
|
|
1101
|
+
tbl_input: list[Union[Mapping[int, ObjectTypes], str]] = [profile.name]
|
|
926
1102
|
if add_description:
|
|
927
1103
|
tbl_input.append(profile.description)
|
|
928
1104
|
if add_config:
|
|
@@ -957,7 +1133,7 @@ class ModelDownloadManager:
|
|
|
957
1133
|
"""
|
|
958
1134
|
|
|
959
1135
|
@staticmethod
|
|
960
|
-
def maybe_download_weights_and_configs(name: str) ->
|
|
1136
|
+
def maybe_download_weights_and_configs(name: str) -> PathLikeOrStr:
|
|
961
1137
|
"""
|
|
962
1138
|
Check if some model is registered. If yes, it will check if their weights
|
|
963
1139
|
must be downloaded. Only weights that have not the same expected size will be downloaded again.
|
|
@@ -967,7 +1143,7 @@ class ModelDownloadManager:
|
|
|
967
1143
|
"""
|
|
968
1144
|
|
|
969
1145
|
absolute_path_weights = ModelCatalog.get_full_path_weights(name)
|
|
970
|
-
file_names:
|
|
1146
|
+
file_names: list[str] = []
|
|
971
1147
|
if ModelCatalog.is_registered(name):
|
|
972
1148
|
profile = ModelCatalog.get_profile(name)
|
|
973
1149
|
# there is nothing to download if hf_repo_id or urls is not provided
|
|
@@ -1000,7 +1176,7 @@ class ModelDownloadManager:
|
|
|
1000
1176
|
return absolute_path_weights
|
|
1001
1177
|
|
|
1002
1178
|
@staticmethod
|
|
1003
|
-
def load_model_from_hf_hub(profile: ModelProfile, absolute_path:
|
|
1179
|
+
def load_model_from_hf_hub(profile: ModelProfile, absolute_path: PathLikeOrStr, file_names: list[str]) -> None:
|
|
1004
1180
|
"""
|
|
1005
1181
|
Load a model from the Huggingface hub for a given profile and saves the model at the directory of the given
|
|
1006
1182
|
path.
|
|
@@ -1026,7 +1202,7 @@ class ModelDownloadManager:
|
|
|
1026
1202
|
)
|
|
1027
1203
|
|
|
1028
1204
|
@staticmethod
|
|
1029
|
-
def _load_from_gd(profile: ModelProfile, absolute_path:
|
|
1205
|
+
def _load_from_gd(profile: ModelProfile, absolute_path: PathLikeOrStr, file_names: list[str]) -> None:
|
|
1030
1206
|
if profile.urls is None:
|
|
1031
1207
|
raise ValueError("urls cannot be None")
|
|
1032
1208
|
for size, url, file_name in zip(profile.size, profile.urls, file_names):
|
|
@@ -1034,7 +1210,7 @@ class ModelDownloadManager:
|
|
|
1034
1210
|
download(str(url), directory, file_name, int(size))
|
|
1035
1211
|
|
|
1036
1212
|
@staticmethod
|
|
1037
|
-
def load_configs_from_hf_hub(profile: ModelProfile, absolute_path:
|
|
1213
|
+
def load_configs_from_hf_hub(profile: ModelProfile, absolute_path: PathLikeOrStr) -> None:
|
|
1038
1214
|
"""
|
|
1039
1215
|
Load config file(s) from the Huggingface hub for a given profile and saves the model at the directory of the
|
|
1040
1216
|
given path.
|
|
@@ -1053,9 +1229,11 @@ class ModelDownloadManager:
|
|
|
1053
1229
|
ModelDownloadManager._load_from_hf_hub(repo_id, file_name, directory)
|
|
1054
1230
|
|
|
1055
1231
|
@staticmethod
|
|
1056
|
-
def _load_from_hf_hub(
|
|
1232
|
+
def _load_from_hf_hub(
|
|
1233
|
+
repo_id: str, file_name: str, cache_directory: PathLikeOrStr, force_download: bool = False
|
|
1234
|
+
) -> int:
|
|
1057
1235
|
url = hf_hub_url(repo_id=repo_id, filename=file_name)
|
|
1058
|
-
token = os.environ.get("HF_CREDENTIALS")
|
|
1236
|
+
token = os.environ.get("HF_CREDENTIALS", None)
|
|
1059
1237
|
f_path = cached_download(
|
|
1060
1238
|
url,
|
|
1061
1239
|
cache_dir=cache_directory,
|