deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
deepdoctection/extern/model.py
CHANGED
|
@@ -20,9 +20,8 @@ Module for ModelCatalog and ModelDownloadManager
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
23
|
-
from copy import copy
|
|
24
23
|
from dataclasses import asdict, dataclass, field
|
|
25
|
-
from typing import Any,
|
|
24
|
+
from typing import Any, Mapping, Optional, Union
|
|
26
25
|
|
|
27
26
|
import jsonlines
|
|
28
27
|
from huggingface_hub import cached_download, hf_hub_url # type: ignore
|
|
@@ -32,11 +31,12 @@ from termcolor import colored
|
|
|
32
31
|
from ..utils.fs import download, get_configs_dir_path, get_weights_dir_path
|
|
33
32
|
from ..utils.logger import LoggingRecord, log_once, logger
|
|
34
33
|
from ..utils.settings import CellType, Languages, LayoutType, ObjectTypes, get_type
|
|
34
|
+
from ..utils.types import PathLikeOrStr
|
|
35
35
|
|
|
36
36
|
__all__ = ["ModelCatalog", "ModelDownloadManager", "print_model_infos", "ModelProfile"]
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
@dataclass
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
40
|
class ModelProfile:
|
|
41
41
|
"""
|
|
42
42
|
Class for model profile. Add for each model one ModelProfile to the ModelCatalog
|
|
@@ -45,25 +45,21 @@ class ModelProfile:
|
|
|
45
45
|
name: str
|
|
46
46
|
description: str
|
|
47
47
|
|
|
48
|
-
size:
|
|
48
|
+
size: list[int]
|
|
49
49
|
tp_model: bool = field(default=False)
|
|
50
50
|
config: Optional[str] = field(default=None)
|
|
51
51
|
preprocessor_config: Optional[str] = field(default=None)
|
|
52
52
|
hf_repo_id: Optional[str] = field(default=None)
|
|
53
53
|
hf_model_name: Optional[str] = field(default=None)
|
|
54
|
-
hf_config_file: Optional[
|
|
55
|
-
urls: Optional[
|
|
56
|
-
categories: Optional[
|
|
54
|
+
hf_config_file: Optional[list[str]] = field(default=None)
|
|
55
|
+
urls: Optional[list[str]] = field(default=None)
|
|
56
|
+
categories: Optional[Mapping[int, ObjectTypes]] = field(default=None)
|
|
57
|
+
categories_orig: Optional[Mapping[str, ObjectTypes]] = field(default=None)
|
|
57
58
|
dl_library: Optional[str] = field(default=None)
|
|
58
59
|
model_wrapper: Optional[str] = field(default=None)
|
|
59
60
|
architecture: Optional[str] = field(default=None)
|
|
60
61
|
|
|
61
|
-
def
|
|
62
|
-
"""updating categories to ObjectTypes. This might be necessary if we load a catalog from a file"""
|
|
63
|
-
if self.categories:
|
|
64
|
-
self.categories = {key: get_type(val) for key, val in self.categories.items()}
|
|
65
|
-
|
|
66
|
-
def as_dict(self) -> Dict[str, Any]:
|
|
62
|
+
def as_dict(self) -> dict[str, Any]:
|
|
67
63
|
"""
|
|
68
64
|
returns a dict of the dataclass
|
|
69
65
|
"""
|
|
@@ -94,7 +90,7 @@ class ModelCatalog:
|
|
|
94
90
|
ModelCatalog.get_full_path_configs("my_new_model")
|
|
95
91
|
"""
|
|
96
92
|
|
|
97
|
-
CATALOG:
|
|
93
|
+
CATALOG: dict[str, ModelProfile] = {
|
|
98
94
|
"layout/model-800000_inf_only.data-00000-of-00001": ModelProfile(
|
|
99
95
|
name="layout/model-800000_inf_only.data-00000-of-00001",
|
|
100
96
|
description="Tensorpack layout model for inference purposes trained on Publaynet",
|
|
@@ -105,11 +101,11 @@ class ModelCatalog:
|
|
|
105
101
|
hf_model_name="model-800000_inf_only",
|
|
106
102
|
hf_config_file=["conf_frcnn_layout.yaml"],
|
|
107
103
|
categories={
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
104
|
+
1: LayoutType.TEXT,
|
|
105
|
+
2: LayoutType.TITLE,
|
|
106
|
+
3: LayoutType.LIST,
|
|
107
|
+
4: LayoutType.TABLE,
|
|
108
|
+
5: LayoutType.FIGURE,
|
|
113
109
|
},
|
|
114
110
|
dl_library="TF",
|
|
115
111
|
model_wrapper="TPFrcnnDetector",
|
|
@@ -123,7 +119,7 @@ class ModelCatalog:
|
|
|
123
119
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
124
120
|
hf_model_name="model-1800000_inf_only",
|
|
125
121
|
hf_config_file=["conf_frcnn_cell.yaml"],
|
|
126
|
-
categories={
|
|
122
|
+
categories={1: LayoutType.CELL},
|
|
127
123
|
dl_library="TF",
|
|
128
124
|
model_wrapper="TPFrcnnDetector",
|
|
129
125
|
),
|
|
@@ -136,7 +132,7 @@ class ModelCatalog:
|
|
|
136
132
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
137
133
|
hf_model_name="model-1620000_inf_only",
|
|
138
134
|
hf_config_file=["conf_frcnn_rows.yaml"],
|
|
139
|
-
categories={
|
|
135
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
140
136
|
dl_library="TF",
|
|
141
137
|
model_wrapper="TPFrcnnDetector",
|
|
142
138
|
),
|
|
@@ -149,7 +145,7 @@ class ModelCatalog:
|
|
|
149
145
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc",
|
|
150
146
|
hf_model_name="model-1620000",
|
|
151
147
|
hf_config_file=["conf_frcnn_rows.yaml"],
|
|
152
|
-
categories={
|
|
148
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
153
149
|
dl_library="TF",
|
|
154
150
|
model_wrapper="TPFrcnnDetector",
|
|
155
151
|
),
|
|
@@ -164,11 +160,11 @@ class ModelCatalog:
|
|
|
164
160
|
hf_config_file=["conf_frcnn_layout.yaml"],
|
|
165
161
|
dl_library="TF",
|
|
166
162
|
categories={
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
163
|
+
1: LayoutType.TEXT,
|
|
164
|
+
2: LayoutType.TITLE,
|
|
165
|
+
3: LayoutType.LIST,
|
|
166
|
+
4: LayoutType.TABLE,
|
|
167
|
+
5: LayoutType.FIGURE,
|
|
172
168
|
},
|
|
173
169
|
model_wrapper="TPFrcnnDetector",
|
|
174
170
|
),
|
|
@@ -181,29 +177,10 @@ class ModelCatalog:
|
|
|
181
177
|
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c",
|
|
182
178
|
hf_model_name="model-1800000",
|
|
183
179
|
hf_config_file=["conf_frcnn_cell.yaml"],
|
|
184
|
-
categories={
|
|
180
|
+
categories={1: LayoutType.CELL},
|
|
185
181
|
dl_library="TF",
|
|
186
182
|
model_wrapper="TPFrcnnDetector",
|
|
187
183
|
),
|
|
188
|
-
"layout/d2_model-800000-layout.pkl": ModelProfile(
|
|
189
|
-
name="layout/d2_model-800000-layout.pkl",
|
|
190
|
-
description="Detectron2 layout detection model trained on Publaynet",
|
|
191
|
-
config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml",
|
|
192
|
-
size=[274568239],
|
|
193
|
-
tp_model=False,
|
|
194
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
|
|
195
|
-
hf_model_name="d2_model-800000-layout.pkl",
|
|
196
|
-
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
197
|
-
categories={
|
|
198
|
-
"1": LayoutType.text,
|
|
199
|
-
"2": LayoutType.title,
|
|
200
|
-
"3": LayoutType.list,
|
|
201
|
-
"4": LayoutType.table,
|
|
202
|
-
"5": LayoutType.figure,
|
|
203
|
-
},
|
|
204
|
-
dl_library="PT",
|
|
205
|
-
model_wrapper="D2FrcnnDetector",
|
|
206
|
-
),
|
|
207
184
|
"layout/d2_model_0829999_layout_inf_only.pt": ModelProfile(
|
|
208
185
|
name="layout/d2_model_0829999_layout_inf_only.pt",
|
|
209
186
|
description="Detectron2 layout detection model trained on Publaynet",
|
|
@@ -214,11 +191,11 @@ class ModelCatalog:
|
|
|
214
191
|
hf_model_name="d2_model_0829999_layout_inf_only.pt",
|
|
215
192
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
216
193
|
categories={
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
194
|
+
1: LayoutType.TEXT,
|
|
195
|
+
2: LayoutType.TITLE,
|
|
196
|
+
3: LayoutType.LIST,
|
|
197
|
+
4: LayoutType.TABLE,
|
|
198
|
+
5: LayoutType.FIGURE,
|
|
222
199
|
},
|
|
223
200
|
dl_library="PT",
|
|
224
201
|
model_wrapper="D2FrcnnDetector",
|
|
@@ -233,11 +210,11 @@ class ModelCatalog:
|
|
|
233
210
|
hf_model_name="d2_model_0829999_layout.pth",
|
|
234
211
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
235
212
|
categories={
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
213
|
+
1: LayoutType.TEXT,
|
|
214
|
+
2: LayoutType.TITLE,
|
|
215
|
+
3: LayoutType.LIST,
|
|
216
|
+
4: LayoutType.TABLE,
|
|
217
|
+
5: LayoutType.FIGURE,
|
|
241
218
|
},
|
|
242
219
|
dl_library="PT",
|
|
243
220
|
model_wrapper="D2FrcnnDetector",
|
|
@@ -252,28 +229,15 @@ class ModelCatalog:
|
|
|
252
229
|
hf_model_name="d2_model_0829999_layout_inf_only.ts",
|
|
253
230
|
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
254
231
|
categories={
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
232
|
+
1: LayoutType.TEXT,
|
|
233
|
+
2: LayoutType.TITLE,
|
|
234
|
+
3: LayoutType.LIST,
|
|
235
|
+
4: LayoutType.TABLE,
|
|
236
|
+
5: LayoutType.FIGURE,
|
|
260
237
|
},
|
|
261
238
|
dl_library="PT",
|
|
262
239
|
model_wrapper="D2FrcnnTracingDetector",
|
|
263
240
|
),
|
|
264
|
-
"cell/d2_model-1800000-cell.pkl": ModelProfile(
|
|
265
|
-
name="cell/d2_model-1800000-cell.pkl",
|
|
266
|
-
description="Detectron2 cell detection inference only model trained on Pubtabnet",
|
|
267
|
-
config="dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml",
|
|
268
|
-
size=[274519039],
|
|
269
|
-
tp_model=False,
|
|
270
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
271
|
-
hf_model_name="d2_model-1800000-cell.pkl",
|
|
272
|
-
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
273
|
-
categories={"1": LayoutType.cell},
|
|
274
|
-
dl_library="PT",
|
|
275
|
-
model_wrapper="D2FrcnnDetector",
|
|
276
|
-
),
|
|
277
241
|
"cell/d2_model_1849999_cell_inf_only.pt": ModelProfile(
|
|
278
242
|
name="cell/d2_model_1849999_cell_inf_only.pt",
|
|
279
243
|
description="Detectron2 cell detection inference only model trained on Pubtabnet",
|
|
@@ -283,7 +247,7 @@ class ModelCatalog:
|
|
|
283
247
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
284
248
|
hf_model_name="d2_model_1849999_cell_inf_only.pt",
|
|
285
249
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
286
|
-
categories={
|
|
250
|
+
categories={1: LayoutType.CELL},
|
|
287
251
|
dl_library="PT",
|
|
288
252
|
model_wrapper="D2FrcnnDetector",
|
|
289
253
|
),
|
|
@@ -296,7 +260,7 @@ class ModelCatalog:
|
|
|
296
260
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
297
261
|
hf_model_name="d2_model_1849999_cell_inf_only.ts",
|
|
298
262
|
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
299
|
-
categories={
|
|
263
|
+
categories={1: LayoutType.CELL},
|
|
300
264
|
dl_library="PT",
|
|
301
265
|
model_wrapper="D2FrcnnTracingDetector",
|
|
302
266
|
),
|
|
@@ -309,20 +273,7 @@ class ModelCatalog:
|
|
|
309
273
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
310
274
|
hf_model_name="cell/d2_model_1849999_cell.pth",
|
|
311
275
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
312
|
-
categories={
|
|
313
|
-
dl_library="PT",
|
|
314
|
-
model_wrapper="D2FrcnnDetector",
|
|
315
|
-
),
|
|
316
|
-
"item/d2_model-1620000-item.pkl": ModelProfile(
|
|
317
|
-
name="item/d2_model-1620000-item.pkl",
|
|
318
|
-
description="Detectron2 item detection inference only model trained on Pubtabnet",
|
|
319
|
-
config="dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml",
|
|
320
|
-
size=[274531339],
|
|
321
|
-
tp_model=False,
|
|
322
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
323
|
-
hf_model_name="d2_model-1620000-item.pkl",
|
|
324
|
-
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
325
|
-
categories={"1": LayoutType.row, "2": LayoutType.column},
|
|
276
|
+
categories={1: LayoutType.CELL},
|
|
326
277
|
dl_library="PT",
|
|
327
278
|
model_wrapper="D2FrcnnDetector",
|
|
328
279
|
),
|
|
@@ -335,7 +286,7 @@ class ModelCatalog:
|
|
|
335
286
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
336
287
|
hf_model_name="d2_model_1639999_item.pth",
|
|
337
288
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
338
|
-
categories={
|
|
289
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
339
290
|
dl_library="PT",
|
|
340
291
|
model_wrapper="D2FrcnnDetector",
|
|
341
292
|
),
|
|
@@ -348,7 +299,7 @@ class ModelCatalog:
|
|
|
348
299
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
349
300
|
hf_model_name="d2_model_1639999_item_inf_only.pt",
|
|
350
301
|
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
351
|
-
categories={
|
|
302
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
352
303
|
dl_library="PT",
|
|
353
304
|
model_wrapper="D2FrcnnDetector",
|
|
354
305
|
),
|
|
@@ -361,10 +312,49 @@ class ModelCatalog:
|
|
|
361
312
|
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
362
313
|
hf_model_name="d2_model_1639999_item_inf_only.ts",
|
|
363
314
|
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
364
|
-
categories={
|
|
315
|
+
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
365
316
|
dl_library="PT",
|
|
366
317
|
model_wrapper="D2FrcnnTracingDetector",
|
|
367
318
|
),
|
|
319
|
+
"nielsr/lilt-xlm-roberta-base/pytorch_model.bin": ModelProfile(
|
|
320
|
+
name="nielsr/lilt-xlm-roberta-base/pytorch_model.bin",
|
|
321
|
+
description="LiLT build with a RobertaXLM base model",
|
|
322
|
+
config="nielsr/lilt-xlm-roberta-base/config.json",
|
|
323
|
+
size=[1136743583],
|
|
324
|
+
tp_model=False,
|
|
325
|
+
hf_repo_id="nielsr/lilt-xlm-roberta-base",
|
|
326
|
+
hf_model_name="pytorch_model.bin",
|
|
327
|
+
hf_config_file=["config.json"],
|
|
328
|
+
dl_library="PT",
|
|
329
|
+
),
|
|
330
|
+
"SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin": ModelProfile(
|
|
331
|
+
name="SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin",
|
|
332
|
+
description="Language-Independent Layout Transformer - InfoXLM model by stitching a pre-trained InfoXLM"
|
|
333
|
+
" and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced"
|
|
334
|
+
" in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for"
|
|
335
|
+
" Structured Document Understanding by Wang et al. and first released in this repository.",
|
|
336
|
+
config="SCUT-DLVCLab/lilt-infoxlm-base/config.json",
|
|
337
|
+
size=[1136743583],
|
|
338
|
+
tp_model=False,
|
|
339
|
+
hf_repo_id="SCUT-DLVCLab/lilt-infoxlm-base",
|
|
340
|
+
hf_model_name="pytorch_model.bin",
|
|
341
|
+
hf_config_file=["config.json"],
|
|
342
|
+
dl_library="PT",
|
|
343
|
+
),
|
|
344
|
+
"SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin": ModelProfile(
|
|
345
|
+
name="SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin",
|
|
346
|
+
description="Language-Independent Layout Transformer - RoBERTa model by stitching a pre-trained RoBERTa"
|
|
347
|
+
" (English) and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was"
|
|
348
|
+
" introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer"
|
|
349
|
+
" for Structured Document Understanding by Wang et al. and first released in this repository.",
|
|
350
|
+
config="SCUT-DLVCLab/lilt-roberta-en-base/config.json",
|
|
351
|
+
size=[523151519],
|
|
352
|
+
tp_model=False,
|
|
353
|
+
hf_repo_id="SCUT-DLVCLab/lilt-roberta-en-base",
|
|
354
|
+
hf_model_name="pytorch_model.bin",
|
|
355
|
+
hf_config_file=["config.json"],
|
|
356
|
+
dl_library="PT",
|
|
357
|
+
),
|
|
368
358
|
"microsoft/layoutlm-base-uncased/pytorch_model.bin": ModelProfile(
|
|
369
359
|
name="microsoft/layoutlm-base-uncased/pytorch_model.bin",
|
|
370
360
|
description="LayoutLM is a simple but effective pre-training method of text and layout for document image"
|
|
@@ -459,7 +449,7 @@ class ModelCatalog:
|
|
|
459
449
|
hf_repo_id="microsoft/table-transformer-detection",
|
|
460
450
|
hf_model_name="pytorch_model.bin",
|
|
461
451
|
hf_config_file=["config.json", "preprocessor_config.json"],
|
|
462
|
-
categories={
|
|
452
|
+
categories={1: LayoutType.TABLE, 2: LayoutType.TABLE_ROTATED},
|
|
463
453
|
dl_library="PT",
|
|
464
454
|
model_wrapper="HFDetrDerivedDetector",
|
|
465
455
|
),
|
|
@@ -477,12 +467,12 @@ class ModelCatalog:
|
|
|
477
467
|
hf_model_name="pytorch_model.bin",
|
|
478
468
|
hf_config_file=["config.json", "preprocessor_config.json"],
|
|
479
469
|
categories={
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
470
|
+
1: LayoutType.TABLE,
|
|
471
|
+
2: LayoutType.COLUMN,
|
|
472
|
+
3: LayoutType.ROW,
|
|
473
|
+
4: CellType.COLUMN_HEADER,
|
|
474
|
+
5: CellType.PROJECTED_ROW_HEADER,
|
|
475
|
+
6: CellType.SPANNING,
|
|
486
476
|
},
|
|
487
477
|
dl_library="PT",
|
|
488
478
|
model_wrapper="HFDetrDerivedDetector",
|
|
@@ -494,7 +484,7 @@ class ModelCatalog:
|
|
|
494
484
|
"https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.",
|
|
495
485
|
size=[101971449],
|
|
496
486
|
urls=["https://doctr-static.mindee.com/models?id=v0.3.1/db_resnet50-ac60cadc.pt&src=0"],
|
|
497
|
-
categories={
|
|
487
|
+
categories={1: LayoutType.WORD},
|
|
498
488
|
dl_library="PT",
|
|
499
489
|
model_wrapper="DoctrTextlineDetector",
|
|
500
490
|
architecture="db_resnet50",
|
|
@@ -506,7 +496,7 @@ class ModelCatalog:
|
|
|
506
496
|
"https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.",
|
|
507
497
|
size=[94178964],
|
|
508
498
|
urls=["https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0"],
|
|
509
|
-
categories={
|
|
499
|
+
categories={1: LayoutType.WORD},
|
|
510
500
|
dl_library="TF",
|
|
511
501
|
model_wrapper="DoctrTextlineDetector",
|
|
512
502
|
architecture="db_resnet50",
|
|
@@ -535,195 +525,386 @@ class ModelCatalog:
|
|
|
535
525
|
model_wrapper="DoctrTextRecognizer",
|
|
536
526
|
architecture="crnn_vgg16_bn",
|
|
537
527
|
),
|
|
528
|
+
"FacebookAI/xlm-roberta-base": ModelProfile(
|
|
529
|
+
name="FacebookAI/xlm-roberta-base/pytorch_model.bin",
|
|
530
|
+
description="XLM-RoBERTa model pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages."
|
|
531
|
+
" It was introduced in the paper Unsupervised Cross-lingual Representation Learning at Scale"
|
|
532
|
+
" by Conneau et al. and first released in this repository.",
|
|
533
|
+
size=[1115590446],
|
|
534
|
+
tp_model=False,
|
|
535
|
+
config="FacebookAI/xlm-roberta-base/config.json",
|
|
536
|
+
hf_repo_id="FacebookAI/xlm-roberta-base",
|
|
537
|
+
hf_model_name="pytorch_model.bin",
|
|
538
|
+
hf_config_file=["config.json"],
|
|
539
|
+
dl_library="PT",
|
|
540
|
+
),
|
|
538
541
|
"fasttext/lid.176.bin": ModelProfile(
|
|
539
542
|
name="fasttext/lid.176.bin",
|
|
540
543
|
description="Fasttext language detection model",
|
|
541
544
|
size=[131266198],
|
|
542
545
|
urls=["https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"],
|
|
543
546
|
categories={
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
547
|
+
1: Languages.ENGLISH,
|
|
548
|
+
2: Languages.RUSSIAN,
|
|
549
|
+
3: Languages.GERMAN,
|
|
550
|
+
4: Languages.FRENCH,
|
|
551
|
+
5: Languages.ITALIAN,
|
|
552
|
+
6: Languages.JAPANESE,
|
|
553
|
+
7: Languages.SPANISH,
|
|
554
|
+
8: Languages.CEBUANO,
|
|
555
|
+
9: Languages.TURKISH,
|
|
556
|
+
10: Languages.PORTUGUESE,
|
|
557
|
+
11: Languages.UKRAINIAN,
|
|
558
|
+
12: Languages.ESPERANTO,
|
|
559
|
+
13: Languages.POLISH,
|
|
560
|
+
14: Languages.SWEDISH,
|
|
561
|
+
15: Languages.DUTCH,
|
|
562
|
+
16: Languages.HEBREW,
|
|
563
|
+
17: Languages.CHINESE,
|
|
564
|
+
18: Languages.HUNGARIAN,
|
|
565
|
+
19: Languages.ARABIC,
|
|
566
|
+
20: Languages.CATALAN,
|
|
567
|
+
21: Languages.FINNISH,
|
|
568
|
+
22: Languages.CZECH,
|
|
569
|
+
23: Languages.PERSIAN,
|
|
570
|
+
24: Languages.SERBIAN,
|
|
571
|
+
25: Languages.GREEK,
|
|
572
|
+
26: Languages.VIETNAMESE,
|
|
573
|
+
27: Languages.BULGARIAN,
|
|
574
|
+
28: Languages.KOREAN,
|
|
575
|
+
29: Languages.NORWEGIAN,
|
|
576
|
+
30: Languages.MACEDONIAN,
|
|
577
|
+
31: Languages.ROMANIAN,
|
|
578
|
+
32: Languages.INDONESIAN,
|
|
579
|
+
33: Languages.THAI,
|
|
580
|
+
34: Languages.ARMENIAN,
|
|
581
|
+
35: Languages.DANISH,
|
|
582
|
+
36: Languages.TAMIL,
|
|
583
|
+
37: Languages.HINDI,
|
|
584
|
+
38: Languages.CROATIAN,
|
|
585
|
+
39: Languages.NOT_DEFINED,
|
|
586
|
+
40: Languages.BELARUSIAN,
|
|
587
|
+
41: Languages.GEORGIAN,
|
|
588
|
+
42: Languages.TELUGU,
|
|
589
|
+
43: Languages.KAZAKH,
|
|
590
|
+
44: Languages.WARAY,
|
|
591
|
+
45: Languages.LITHUANIAN,
|
|
592
|
+
46: Languages.SCOTTISH,
|
|
593
|
+
47: Languages.SLOVAK,
|
|
594
|
+
48: Languages.BENIN,
|
|
595
|
+
49: Languages.BASQUE,
|
|
596
|
+
50: Languages.SLOVENIAN,
|
|
597
|
+
51: Languages.NOT_DEFINED,
|
|
598
|
+
52: Languages.MALAYALAM,
|
|
599
|
+
53: Languages.MARATHI,
|
|
600
|
+
54: Languages.ESTONIAN,
|
|
601
|
+
55: Languages.AZERBAIJANI,
|
|
602
|
+
56: Languages.NOT_DEFINED,
|
|
603
|
+
57: Languages.ALBANIAN,
|
|
604
|
+
58: Languages.LATIN,
|
|
605
|
+
59: Languages.BOSNIAN,
|
|
606
|
+
60: Languages.NORWEGIAN_NOVOSIBIRSK,
|
|
607
|
+
61: Languages.URDU,
|
|
608
|
+
62: Languages.NOT_DEFINED,
|
|
609
|
+
63: Languages.NOT_DEFINED,
|
|
610
|
+
64: Languages.NOT_DEFINED,
|
|
611
|
+
65: Languages.NOT_DEFINED,
|
|
612
|
+
66: Languages.NOT_DEFINED,
|
|
613
|
+
67: Languages.NOT_DEFINED,
|
|
614
|
+
68: Languages.NOT_DEFINED,
|
|
615
|
+
69: Languages.NOT_DEFINED,
|
|
616
|
+
70: Languages.NOT_DEFINED,
|
|
617
|
+
71: Languages.NOT_DEFINED,
|
|
618
|
+
72: Languages.NOT_DEFINED,
|
|
619
|
+
73: Languages.NOT_DEFINED,
|
|
620
|
+
74: Languages.NOT_DEFINED,
|
|
621
|
+
75: Languages.NOT_DEFINED,
|
|
622
|
+
76: Languages.NOT_DEFINED,
|
|
623
|
+
77: Languages.NOT_DEFINED,
|
|
624
|
+
78: Languages.NOT_DEFINED,
|
|
625
|
+
79: Languages.NOT_DEFINED,
|
|
626
|
+
80: Languages.NOT_DEFINED,
|
|
627
|
+
81: Languages.NOT_DEFINED,
|
|
628
|
+
82: Languages.NOT_DEFINED,
|
|
629
|
+
83: Languages.NOT_DEFINED,
|
|
630
|
+
84: Languages.NOT_DEFINED,
|
|
631
|
+
85: Languages.NOT_DEFINED,
|
|
632
|
+
86: Languages.NOT_DEFINED,
|
|
633
|
+
87: Languages.NOT_DEFINED,
|
|
634
|
+
88: Languages.NOT_DEFINED,
|
|
635
|
+
89: Languages.NOT_DEFINED,
|
|
636
|
+
90: Languages.NOT_DEFINED,
|
|
637
|
+
91: Languages.NOT_DEFINED,
|
|
638
|
+
92: Languages.NOT_DEFINED,
|
|
639
|
+
93: Languages.NOT_DEFINED,
|
|
640
|
+
94: Languages.NOT_DEFINED,
|
|
641
|
+
95: Languages.NOT_DEFINED,
|
|
642
|
+
96: Languages.NOT_DEFINED,
|
|
643
|
+
97: Languages.NOT_DEFINED,
|
|
644
|
+
98: Languages.NOT_DEFINED,
|
|
645
|
+
99: Languages.NOT_DEFINED,
|
|
646
|
+
100: Languages.NOT_DEFINED,
|
|
647
|
+
101: Languages.NOT_DEFINED,
|
|
648
|
+
102: Languages.NOT_DEFINED,
|
|
649
|
+
103: Languages.NOT_DEFINED,
|
|
650
|
+
104: Languages.NOT_DEFINED,
|
|
651
|
+
105: Languages.NOT_DEFINED,
|
|
652
|
+
106: Languages.NOT_DEFINED,
|
|
653
|
+
107: Languages.NOT_DEFINED,
|
|
654
|
+
108: Languages.NOT_DEFINED,
|
|
655
|
+
109: Languages.NOT_DEFINED,
|
|
656
|
+
110: Languages.NOT_DEFINED,
|
|
657
|
+
111: Languages.NOT_DEFINED,
|
|
658
|
+
112: Languages.NOT_DEFINED,
|
|
659
|
+
113: Languages.NOT_DEFINED,
|
|
660
|
+
114: Languages.NOT_DEFINED,
|
|
661
|
+
115: Languages.NOT_DEFINED,
|
|
662
|
+
116: Languages.NOT_DEFINED,
|
|
663
|
+
117: Languages.NOT_DEFINED,
|
|
664
|
+
118: Languages.NOT_DEFINED,
|
|
665
|
+
119: Languages.NOT_DEFINED,
|
|
666
|
+
120: Languages.NOT_DEFINED,
|
|
667
|
+
121: Languages.NOT_DEFINED,
|
|
668
|
+
122: Languages.NOT_DEFINED,
|
|
669
|
+
123: Languages.NOT_DEFINED,
|
|
670
|
+
124: Languages.NOT_DEFINED,
|
|
671
|
+
125: Languages.NOT_DEFINED,
|
|
672
|
+
126: Languages.NOT_DEFINED,
|
|
673
|
+
127: Languages.NOT_DEFINED,
|
|
674
|
+
128: Languages.NOT_DEFINED,
|
|
675
|
+
129: Languages.NOT_DEFINED,
|
|
676
|
+
130: Languages.NOT_DEFINED,
|
|
677
|
+
131: Languages.NOT_DEFINED,
|
|
678
|
+
132: Languages.NOT_DEFINED,
|
|
679
|
+
133: Languages.NOT_DEFINED,
|
|
680
|
+
134: Languages.NOT_DEFINED,
|
|
681
|
+
135: Languages.NOT_DEFINED,
|
|
682
|
+
136: Languages.NOT_DEFINED,
|
|
683
|
+
137: Languages.NOT_DEFINED,
|
|
684
|
+
138: Languages.NOT_DEFINED,
|
|
685
|
+
139: Languages.NOT_DEFINED,
|
|
686
|
+
140: Languages.NOT_DEFINED,
|
|
687
|
+
141: Languages.NOT_DEFINED,
|
|
688
|
+
142: Languages.NOT_DEFINED,
|
|
689
|
+
143: Languages.NOT_DEFINED,
|
|
690
|
+
144: Languages.NOT_DEFINED,
|
|
691
|
+
145: Languages.NOT_DEFINED,
|
|
692
|
+
146: Languages.NOT_DEFINED,
|
|
693
|
+
147: Languages.NOT_DEFINED,
|
|
694
|
+
148: Languages.NOT_DEFINED,
|
|
695
|
+
149: Languages.NOT_DEFINED,
|
|
696
|
+
150: Languages.NOT_DEFINED,
|
|
697
|
+
151: Languages.NOT_DEFINED,
|
|
698
|
+
152: Languages.NOT_DEFINED,
|
|
699
|
+
153: Languages.NOT_DEFINED,
|
|
700
|
+
154: Languages.NOT_DEFINED,
|
|
701
|
+
155: Languages.NOT_DEFINED,
|
|
702
|
+
156: Languages.NOT_DEFINED,
|
|
703
|
+
157: Languages.NOT_DEFINED,
|
|
704
|
+
158: Languages.NOT_DEFINED,
|
|
705
|
+
159: Languages.NOT_DEFINED,
|
|
706
|
+
160: Languages.NOT_DEFINED,
|
|
707
|
+
161: Languages.NOT_DEFINED,
|
|
708
|
+
162: Languages.NOT_DEFINED,
|
|
709
|
+
163: Languages.NOT_DEFINED,
|
|
710
|
+
164: Languages.NOT_DEFINED,
|
|
711
|
+
165: Languages.NOT_DEFINED,
|
|
712
|
+
166: Languages.NOT_DEFINED,
|
|
713
|
+
167: Languages.NOT_DEFINED,
|
|
714
|
+
168: Languages.NOT_DEFINED,
|
|
715
|
+
169: Languages.NOT_DEFINED,
|
|
716
|
+
170: Languages.NOT_DEFINED,
|
|
717
|
+
171: Languages.NOT_DEFINED,
|
|
718
|
+
172: Languages.NOT_DEFINED,
|
|
719
|
+
173: Languages.NOT_DEFINED,
|
|
720
|
+
174: Languages.NOT_DEFINED,
|
|
721
|
+
175: Languages.NOT_DEFINED,
|
|
722
|
+
176: Languages.NOT_DEFINED,
|
|
723
|
+
},
|
|
724
|
+
categories_orig={
|
|
725
|
+
"__label__en": Languages.ENGLISH,
|
|
726
|
+
"__label__ru": Languages.RUSSIAN,
|
|
727
|
+
"__label__de": Languages.GERMAN,
|
|
728
|
+
"__label__fr": Languages.FRENCH,
|
|
729
|
+
"__label__it": Languages.ITALIAN,
|
|
730
|
+
"__label__ja": Languages.JAPANESE,
|
|
731
|
+
"__label__es": Languages.SPANISH,
|
|
732
|
+
"__label__ceb": Languages.CEBUANO,
|
|
733
|
+
"__label__tr": Languages.TURKISH,
|
|
734
|
+
"__label__pt": Languages.PORTUGUESE,
|
|
735
|
+
"__label__uk": Languages.UKRAINIAN,
|
|
736
|
+
"__label__eo": Languages.ESPERANTO,
|
|
737
|
+
"__label__pl": Languages.POLISH,
|
|
738
|
+
"__label__sv": Languages.SWEDISH,
|
|
739
|
+
"__label__nl": Languages.DUTCH,
|
|
740
|
+
"__label__he": Languages.HEBREW,
|
|
741
|
+
"__label__zh": Languages.CHINESE,
|
|
742
|
+
"__label__hu": Languages.HUNGARIAN,
|
|
743
|
+
"__label__ar": Languages.ARABIC,
|
|
744
|
+
"__label__ca": Languages.CATALAN,
|
|
745
|
+
"__label__fi": Languages.FINNISH,
|
|
746
|
+
"__label__cs": Languages.CZECH,
|
|
747
|
+
"__label__fa": Languages.PERSIAN,
|
|
748
|
+
"__label__sr": Languages.SERBIAN,
|
|
749
|
+
"__label__el": Languages.GREEK,
|
|
750
|
+
"__label__vi": Languages.VIETNAMESE,
|
|
751
|
+
"__label__bg": Languages.BULGARIAN,
|
|
752
|
+
"__label__ko": Languages.KOREAN,
|
|
753
|
+
"__label__no": Languages.NORWEGIAN,
|
|
754
|
+
"__label__mk": Languages.MACEDONIAN,
|
|
755
|
+
"__label__ro": Languages.ROMANIAN,
|
|
756
|
+
"__label__id": Languages.INDONESIAN,
|
|
757
|
+
"__label__th": Languages.THAI,
|
|
758
|
+
"__label__hy": Languages.ARMENIAN,
|
|
759
|
+
"__label__da": Languages.DANISH,
|
|
760
|
+
"__label__ta": Languages.TAMIL,
|
|
761
|
+
"__label__hi": Languages.HINDI,
|
|
762
|
+
"__label__hr": Languages.CROATIAN,
|
|
763
|
+
"__label__sh": Languages.NOT_DEFINED,
|
|
764
|
+
"__label__be": Languages.BELARUSIAN,
|
|
765
|
+
"__label__ka": Languages.GEORGIAN,
|
|
766
|
+
"__label__te": Languages.TELUGU,
|
|
767
|
+
"__label__kk": Languages.KAZAKH,
|
|
768
|
+
"__label__war": Languages.WARAY,
|
|
769
|
+
"__label__lt": Languages.LITHUANIAN,
|
|
770
|
+
"__label__gl": Languages.SCOTTISH,
|
|
771
|
+
"__label__sk": Languages.SLOVAK,
|
|
772
|
+
"__label__bn": Languages.BENIN,
|
|
773
|
+
"__label__eu": Languages.BASQUE,
|
|
774
|
+
"__label__sl": Languages.SLOVENIAN,
|
|
775
|
+
"__label__kn": Languages.NOT_DEFINED,
|
|
776
|
+
"__label__ml": Languages.MALAYALAM,
|
|
777
|
+
"__label__mr": Languages.MARATHI,
|
|
778
|
+
"__label__et": Languages.ESTONIAN,
|
|
779
|
+
"__label__az": Languages.AZERBAIJANI,
|
|
780
|
+
"__label__ms": Languages.NOT_DEFINED,
|
|
781
|
+
"__label__sq": Languages.ALBANIAN,
|
|
782
|
+
"__label__la": Languages.LATIN,
|
|
783
|
+
"__label__bs": Languages.BOSNIAN,
|
|
784
|
+
"__label__nn": Languages.NORWEGIAN_NOVOSIBIRSK,
|
|
785
|
+
"__label__ur": Languages.URDU,
|
|
786
|
+
"__label__lv": Languages.NOT_DEFINED,
|
|
787
|
+
"__label__my": Languages.NOT_DEFINED,
|
|
788
|
+
"__label__tt": Languages.NOT_DEFINED,
|
|
789
|
+
"__label__af": Languages.NOT_DEFINED,
|
|
790
|
+
"__label__oc": Languages.NOT_DEFINED,
|
|
791
|
+
"__label__nds": Languages.NOT_DEFINED,
|
|
792
|
+
"__label__ky": Languages.NOT_DEFINED,
|
|
793
|
+
"__label__ast": Languages.NOT_DEFINED,
|
|
794
|
+
"__label__tl": Languages.NOT_DEFINED,
|
|
795
|
+
"__label__is": Languages.NOT_DEFINED,
|
|
796
|
+
"__label__ia": Languages.NOT_DEFINED,
|
|
797
|
+
"__label__si": Languages.NOT_DEFINED,
|
|
798
|
+
"__label__gu": Languages.NOT_DEFINED,
|
|
799
|
+
"__label__km": Languages.NOT_DEFINED,
|
|
800
|
+
"__label__br": Languages.NOT_DEFINED,
|
|
801
|
+
"__label__ba": Languages.NOT_DEFINED,
|
|
802
|
+
"__label__uz": Languages.NOT_DEFINED,
|
|
803
|
+
"__label__bo": Languages.NOT_DEFINED,
|
|
804
|
+
"__label__pa": Languages.NOT_DEFINED,
|
|
805
|
+
"__label__vo": Languages.NOT_DEFINED,
|
|
806
|
+
"__label__als": Languages.NOT_DEFINED,
|
|
807
|
+
"__label__ne": Languages.NOT_DEFINED,
|
|
808
|
+
"__label__cy": Languages.NOT_DEFINED,
|
|
809
|
+
"__label__jbo": Languages.NOT_DEFINED,
|
|
810
|
+
"__label__fy": Languages.NOT_DEFINED,
|
|
811
|
+
"__label__mn": Languages.NOT_DEFINED,
|
|
812
|
+
"__label__lb": Languages.NOT_DEFINED,
|
|
813
|
+
"__label__ce": Languages.NOT_DEFINED,
|
|
814
|
+
"__label__ug": Languages.NOT_DEFINED,
|
|
815
|
+
"__label__tg": Languages.NOT_DEFINED,
|
|
816
|
+
"__label__sco": Languages.NOT_DEFINED,
|
|
817
|
+
"__label__sa": Languages.NOT_DEFINED,
|
|
818
|
+
"__label__cv": Languages.NOT_DEFINED,
|
|
819
|
+
"__label__jv": Languages.NOT_DEFINED,
|
|
820
|
+
"__label__min": Languages.NOT_DEFINED,
|
|
821
|
+
"__label__io": Languages.NOT_DEFINED,
|
|
822
|
+
"__label__or": Languages.NOT_DEFINED,
|
|
823
|
+
"__label__as": Languages.NOT_DEFINED,
|
|
824
|
+
"__label__new": Languages.NOT_DEFINED,
|
|
825
|
+
"__label__ga": Languages.NOT_DEFINED,
|
|
826
|
+
"__label__mg": Languages.NOT_DEFINED,
|
|
827
|
+
"__label__an": Languages.NOT_DEFINED,
|
|
828
|
+
"__label__ckb": Languages.NOT_DEFINED,
|
|
829
|
+
"__label__sw": Languages.NOT_DEFINED,
|
|
830
|
+
"__label__bar": Languages.NOT_DEFINED,
|
|
831
|
+
"__label__lmo": Languages.NOT_DEFINED,
|
|
832
|
+
"__label__yi": Languages.NOT_DEFINED,
|
|
833
|
+
"__label__arz": Languages.NOT_DEFINED,
|
|
834
|
+
"__label__mhr": Languages.NOT_DEFINED,
|
|
835
|
+
"__label__azb": Languages.NOT_DEFINED,
|
|
836
|
+
"__label__sah": Languages.NOT_DEFINED,
|
|
837
|
+
"__label__pnb": Languages.NOT_DEFINED,
|
|
838
|
+
"__label__su": Languages.NOT_DEFINED,
|
|
839
|
+
"__label__bpy": Languages.NOT_DEFINED,
|
|
840
|
+
"__label__pms": Languages.NOT_DEFINED,
|
|
841
|
+
"__label__ilo": Languages.NOT_DEFINED,
|
|
842
|
+
"__label__wuu": Languages.NOT_DEFINED,
|
|
843
|
+
"__label__ku": Languages.NOT_DEFINED,
|
|
844
|
+
"__label__ps": Languages.NOT_DEFINED,
|
|
845
|
+
"__label__ie": Languages.NOT_DEFINED,
|
|
846
|
+
"__label__xmf": Languages.NOT_DEFINED,
|
|
847
|
+
"__label__yue": Languages.NOT_DEFINED,
|
|
848
|
+
"__label__gom": Languages.NOT_DEFINED,
|
|
849
|
+
"__label__li": Languages.NOT_DEFINED,
|
|
850
|
+
"__label__mwl": Languages.NOT_DEFINED,
|
|
851
|
+
"__label__kw": Languages.NOT_DEFINED,
|
|
852
|
+
"__label__sd": Languages.NOT_DEFINED,
|
|
853
|
+
"__label__hsb": Languages.NOT_DEFINED,
|
|
854
|
+
"__label__scn": Languages.NOT_DEFINED,
|
|
855
|
+
"__label__gd": Languages.NOT_DEFINED,
|
|
856
|
+
"__label__pam": Languages.NOT_DEFINED,
|
|
857
|
+
"__label__bh": Languages.NOT_DEFINED,
|
|
858
|
+
"__label__mai": Languages.NOT_DEFINED,
|
|
859
|
+
"__label__vec": Languages.NOT_DEFINED,
|
|
860
|
+
"__label__mt": Languages.NOT_DEFINED,
|
|
861
|
+
"__label__dv": Languages.NOT_DEFINED,
|
|
862
|
+
"__label__wa": Languages.NOT_DEFINED,
|
|
863
|
+
"__label__mzn": Languages.NOT_DEFINED,
|
|
864
|
+
"__label__am": Languages.NOT_DEFINED,
|
|
865
|
+
"__label__qu": Languages.NOT_DEFINED,
|
|
866
|
+
"__label__eml": Languages.NOT_DEFINED,
|
|
867
|
+
"__label__cbk": Languages.NOT_DEFINED,
|
|
868
|
+
"__label__tk": Languages.NOT_DEFINED,
|
|
869
|
+
"__label__rm": Languages.NOT_DEFINED,
|
|
870
|
+
"__label__os": Languages.NOT_DEFINED,
|
|
871
|
+
"__label__vls": Languages.NOT_DEFINED,
|
|
872
|
+
"__label__yo": Languages.NOT_DEFINED,
|
|
873
|
+
"__label__lo": Languages.NOT_DEFINED,
|
|
874
|
+
"__label__lez": Languages.NOT_DEFINED,
|
|
875
|
+
"__label__so": Languages.NOT_DEFINED,
|
|
876
|
+
"__label__myv": Languages.NOT_DEFINED,
|
|
877
|
+
"__label__diq": Languages.NOT_DEFINED,
|
|
878
|
+
"__label__mrj": Languages.NOT_DEFINED,
|
|
879
|
+
"__label__dsb": Languages.NOT_DEFINED,
|
|
880
|
+
"__label__frr": Languages.NOT_DEFINED,
|
|
881
|
+
"__label__ht": Languages.NOT_DEFINED,
|
|
882
|
+
"__label__gn": Languages.NOT_DEFINED,
|
|
883
|
+
"__label__bxr": Languages.NOT_DEFINED,
|
|
884
|
+
"__label__kv": Languages.NOT_DEFINED,
|
|
885
|
+
"__label__sc": Languages.NOT_DEFINED,
|
|
886
|
+
"__label__nah": Languages.NOT_DEFINED,
|
|
887
|
+
"__label__krc": Languages.NOT_DEFINED,
|
|
888
|
+
"__label__bcl": Languages.NOT_DEFINED,
|
|
889
|
+
"__label__nap": Languages.NOT_DEFINED,
|
|
890
|
+
"__label__gv": Languages.NOT_DEFINED,
|
|
891
|
+
"__label__av": Languages.NOT_DEFINED,
|
|
892
|
+
"__label__rue": Languages.NOT_DEFINED,
|
|
893
|
+
"__label__xal": Languages.NOT_DEFINED,
|
|
894
|
+
"__label__pfl": Languages.NOT_DEFINED,
|
|
895
|
+
"__label__dty": Languages.NOT_DEFINED,
|
|
896
|
+
"__label__hif": Languages.NOT_DEFINED,
|
|
897
|
+
"__label__co": Languages.NOT_DEFINED,
|
|
898
|
+
"__label__lrc": Languages.NOT_DEFINED,
|
|
899
|
+
"__label__vep": Languages.NOT_DEFINED,
|
|
900
|
+
"__label__tyv": Languages.NOT_DEFINED,
|
|
720
901
|
},
|
|
721
902
|
model_wrapper="FasttextLangDetector",
|
|
722
903
|
),
|
|
723
904
|
}
|
|
724
905
|
|
|
725
906
|
@staticmethod
|
|
726
|
-
def get_full_path_weights(name:
|
|
907
|
+
def get_full_path_weights(name: PathLikeOrStr) -> PathLikeOrStr:
|
|
727
908
|
"""
|
|
728
909
|
Returns the absolute path of weights.
|
|
729
910
|
|
|
@@ -734,7 +915,7 @@ class ModelCatalog:
|
|
|
734
915
|
:return: absolute weight path
|
|
735
916
|
"""
|
|
736
917
|
try:
|
|
737
|
-
profile = ModelCatalog.get_profile(name)
|
|
918
|
+
profile = ModelCatalog.get_profile(os.fspath(name))
|
|
738
919
|
except KeyError:
|
|
739
920
|
logger.info(
|
|
740
921
|
LoggingRecord(
|
|
@@ -754,7 +935,7 @@ class ModelCatalog:
|
|
|
754
935
|
return os.path.join(get_weights_dir_path(), name)
|
|
755
936
|
|
|
756
937
|
@staticmethod
|
|
757
|
-
def get_full_path_configs(name:
|
|
938
|
+
def get_full_path_configs(name: PathLikeOrStr) -> PathLikeOrStr:
|
|
758
939
|
"""
|
|
759
940
|
Return the absolute path of configs for some given weights. Alternatively, pass last a path to a config file
|
|
760
941
|
(without the base path to the cache config directory).
|
|
@@ -766,7 +947,7 @@ class ModelCatalog:
|
|
|
766
947
|
:return: absolute path to the config
|
|
767
948
|
"""
|
|
768
949
|
try:
|
|
769
|
-
profile = ModelCatalog.get_profile(name)
|
|
950
|
+
profile = ModelCatalog.get_profile(os.fspath(name))
|
|
770
951
|
except KeyError:
|
|
771
952
|
logger.info(
|
|
772
953
|
LoggingRecord(
|
|
@@ -780,7 +961,7 @@ class ModelCatalog:
|
|
|
780
961
|
return os.path.join(get_configs_dir_path(), name)
|
|
781
962
|
|
|
782
963
|
@staticmethod
|
|
783
|
-
def get_full_path_preprocessor_configs(name: str) ->
|
|
964
|
+
def get_full_path_preprocessor_configs(name: Union[str]) -> PathLikeOrStr:
|
|
784
965
|
"""
|
|
785
966
|
Return the absolute path of preprocessor configs for some given weights. Preprocessor are occasionally provided
|
|
786
967
|
by the transformer library.
|
|
@@ -804,21 +985,21 @@ class ModelCatalog:
|
|
|
804
985
|
return os.path.join(get_configs_dir_path(), name)
|
|
805
986
|
|
|
806
987
|
@staticmethod
|
|
807
|
-
def get_model_list() ->
|
|
988
|
+
def get_model_list() -> list[PathLikeOrStr]:
|
|
808
989
|
"""
|
|
809
990
|
Returns a list of absolute paths of registered models.
|
|
810
991
|
"""
|
|
811
992
|
return [os.path.join(get_weights_dir_path(), profile.name) for profile in ModelCatalog.CATALOG.values()]
|
|
812
993
|
|
|
813
994
|
@staticmethod
|
|
814
|
-
def get_profile_list() ->
|
|
995
|
+
def get_profile_list() -> list[str]:
|
|
815
996
|
"""
|
|
816
997
|
Returns a list profile keys.
|
|
817
998
|
"""
|
|
818
999
|
return list(ModelCatalog.CATALOG.keys())
|
|
819
1000
|
|
|
820
1001
|
@staticmethod
|
|
821
|
-
def is_registered(path_weights:
|
|
1002
|
+
def is_registered(path_weights: PathLikeOrStr) -> bool:
|
|
822
1003
|
"""
|
|
823
1004
|
Checks if some weights belong to a registered model
|
|
824
1005
|
|
|
@@ -842,8 +1023,8 @@ class ModelCatalog:
|
|
|
842
1023
|
|
|
843
1024
|
profile = ModelCatalog.CATALOG.get(name)
|
|
844
1025
|
if profile is not None:
|
|
845
|
-
return
|
|
846
|
-
raise KeyError("Model Profile does not exist. Please make sure the model is registered")
|
|
1026
|
+
return profile
|
|
1027
|
+
raise KeyError(f"Model Profile {name} does not exist. Please make sure the model is registered")
|
|
847
1028
|
|
|
848
1029
|
@staticmethod
|
|
849
1030
|
def register(name: str, profile: ModelProfile) -> None:
|
|
@@ -859,7 +1040,7 @@ class ModelCatalog:
|
|
|
859
1040
|
ModelCatalog.CATALOG[name] = profile
|
|
860
1041
|
|
|
861
1042
|
@staticmethod
|
|
862
|
-
def load_profiles_from_file(path: Optional[
|
|
1043
|
+
def load_profiles_from_file(path: Optional[PathLikeOrStr] = None) -> None:
|
|
863
1044
|
"""
|
|
864
1045
|
Load model profiles from a jsonl file and extend `CATALOG` with the new profiles.
|
|
865
1046
|
|
|
@@ -870,10 +1051,11 @@ class ModelCatalog:
|
|
|
870
1051
|
with jsonlines.open(path) as reader:
|
|
871
1052
|
for obj in reader:
|
|
872
1053
|
if not obj["name"] in ModelCatalog.CATALOG:
|
|
1054
|
+
obj["categories"] = {int(key): get_type(val) for key, val in obj["categories"].items()}
|
|
873
1055
|
ModelCatalog.register(obj["name"], ModelProfile(**obj))
|
|
874
1056
|
|
|
875
1057
|
@staticmethod
|
|
876
|
-
def save_profiles_to_file(target_path:
|
|
1058
|
+
def save_profiles_to_file(target_path: PathLikeOrStr) -> None:
|
|
877
1059
|
"""
|
|
878
1060
|
Save model profiles to a jsonl file.
|
|
879
1061
|
|
|
@@ -889,7 +1071,7 @@ class ModelCatalog:
|
|
|
889
1071
|
ModelCatalog.load_profiles_from_file(os.environ.get("MODEL_CATALOG", None))
|
|
890
1072
|
|
|
891
1073
|
|
|
892
|
-
def get_tp_weight_names(name: str) ->
|
|
1074
|
+
def get_tp_weight_names(name: str) -> list[str]:
|
|
893
1075
|
"""
|
|
894
1076
|
Given a path to some model weights it will return all file names according to TP naming convention
|
|
895
1077
|
|
|
@@ -915,7 +1097,7 @@ def print_model_infos(add_description: bool = True, add_config: bool = True, add
|
|
|
915
1097
|
num_columns = min(6, len(profiles))
|
|
916
1098
|
infos = []
|
|
917
1099
|
for profile in profiles:
|
|
918
|
-
tbl_input:
|
|
1100
|
+
tbl_input: list[Union[Mapping[int, ObjectTypes], str]] = [profile.name]
|
|
919
1101
|
if add_description:
|
|
920
1102
|
tbl_input.append(profile.description)
|
|
921
1103
|
if add_config:
|
|
@@ -950,7 +1132,7 @@ class ModelDownloadManager:
|
|
|
950
1132
|
"""
|
|
951
1133
|
|
|
952
1134
|
@staticmethod
|
|
953
|
-
def maybe_download_weights_and_configs(name: str) ->
|
|
1135
|
+
def maybe_download_weights_and_configs(name: str) -> PathLikeOrStr:
|
|
954
1136
|
"""
|
|
955
1137
|
Check if some model is registered. If yes, it will check if their weights
|
|
956
1138
|
must be downloaded. Only weights that have not the same expected size will be downloaded again.
|
|
@@ -960,7 +1142,7 @@ class ModelDownloadManager:
|
|
|
960
1142
|
"""
|
|
961
1143
|
|
|
962
1144
|
absolute_path_weights = ModelCatalog.get_full_path_weights(name)
|
|
963
|
-
file_names:
|
|
1145
|
+
file_names: list[str] = []
|
|
964
1146
|
if ModelCatalog.is_registered(name):
|
|
965
1147
|
profile = ModelCatalog.get_profile(name)
|
|
966
1148
|
# there is nothing to download if hf_repo_id or urls is not provided
|
|
@@ -980,9 +1162,11 @@ class ModelDownloadManager:
|
|
|
980
1162
|
else:
|
|
981
1163
|
file_names.append(model_name)
|
|
982
1164
|
if profile.hf_repo_id:
|
|
983
|
-
|
|
1165
|
+
if not os.path.isfile(absolute_path_weights):
|
|
1166
|
+
ModelDownloadManager.load_model_from_hf_hub(profile, absolute_path_weights, file_names)
|
|
984
1167
|
absolute_path_configs = ModelCatalog.get_full_path_configs(name)
|
|
985
|
-
|
|
1168
|
+
if not os.path.isfile(absolute_path_configs):
|
|
1169
|
+
ModelDownloadManager.load_configs_from_hf_hub(profile, absolute_path_configs)
|
|
986
1170
|
else:
|
|
987
1171
|
ModelDownloadManager._load_from_gd(profile, absolute_path_weights, file_names)
|
|
988
1172
|
|
|
@@ -991,7 +1175,7 @@ class ModelDownloadManager:
|
|
|
991
1175
|
return absolute_path_weights
|
|
992
1176
|
|
|
993
1177
|
@staticmethod
|
|
994
|
-
def load_model_from_hf_hub(profile: ModelProfile, absolute_path:
|
|
1178
|
+
def load_model_from_hf_hub(profile: ModelProfile, absolute_path: PathLikeOrStr, file_names: list[str]) -> None:
|
|
995
1179
|
"""
|
|
996
1180
|
Load a model from the Huggingface hub for a given profile and saves the model at the directory of the given
|
|
997
1181
|
path.
|
|
@@ -1017,7 +1201,7 @@ class ModelDownloadManager:
|
|
|
1017
1201
|
)
|
|
1018
1202
|
|
|
1019
1203
|
@staticmethod
|
|
1020
|
-
def _load_from_gd(profile: ModelProfile, absolute_path:
|
|
1204
|
+
def _load_from_gd(profile: ModelProfile, absolute_path: PathLikeOrStr, file_names: list[str]) -> None:
|
|
1021
1205
|
if profile.urls is None:
|
|
1022
1206
|
raise ValueError("urls cannot be None")
|
|
1023
1207
|
for size, url, file_name in zip(profile.size, profile.urls, file_names):
|
|
@@ -1025,7 +1209,7 @@ class ModelDownloadManager:
|
|
|
1025
1209
|
download(str(url), directory, file_name, int(size))
|
|
1026
1210
|
|
|
1027
1211
|
@staticmethod
|
|
1028
|
-
def load_configs_from_hf_hub(profile: ModelProfile, absolute_path:
|
|
1212
|
+
def load_configs_from_hf_hub(profile: ModelProfile, absolute_path: PathLikeOrStr) -> None:
|
|
1029
1213
|
"""
|
|
1030
1214
|
Load config file(s) from the Huggingface hub for a given profile and saves the model at the directory of the
|
|
1031
1215
|
given path.
|
|
@@ -1044,9 +1228,11 @@ class ModelDownloadManager:
|
|
|
1044
1228
|
ModelDownloadManager._load_from_hf_hub(repo_id, file_name, directory)
|
|
1045
1229
|
|
|
1046
1230
|
@staticmethod
|
|
1047
|
-
def _load_from_hf_hub(
|
|
1231
|
+
def _load_from_hf_hub(
|
|
1232
|
+
repo_id: str, file_name: str, cache_directory: PathLikeOrStr, force_download: bool = False
|
|
1233
|
+
) -> int:
|
|
1048
1234
|
url = hf_hub_url(repo_id=repo_id, filename=file_name)
|
|
1049
|
-
token = os.environ.get("HF_CREDENTIALS")
|
|
1235
|
+
token = os.environ.get("HF_CREDENTIALS", None)
|
|
1050
1236
|
f_path = cached_download(
|
|
1051
1237
|
url,
|
|
1052
1238
|
cache_dir=cache_directory,
|