deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
deepdoctection/extern/model.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
`ModelCatalog` and`ModelDownloadManager`
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
@@ -28,9 +28,16 @@ from huggingface_hub import hf_hub_download
|
|
|
28
28
|
from tabulate import tabulate
|
|
29
29
|
from termcolor import colored
|
|
30
30
|
|
|
31
|
-
from ..utils.fs import
|
|
31
|
+
from ..utils.fs import (
|
|
32
|
+
download,
|
|
33
|
+
get_cache_dir_path,
|
|
34
|
+
get_configs_dir_path,
|
|
35
|
+
get_package_path,
|
|
36
|
+
get_weights_dir_path,
|
|
37
|
+
maybe_copy_config_to_cache,
|
|
38
|
+
)
|
|
32
39
|
from ..utils.logger import LoggingRecord, log_once, logger
|
|
33
|
-
from ..utils.settings import
|
|
40
|
+
from ..utils.settings import ObjectTypes, get_type
|
|
34
41
|
from ..utils.types import PathLikeOrStr
|
|
35
42
|
|
|
36
43
|
__all__ = ["ModelCatalog", "ModelDownloadManager", "print_model_infos", "ModelProfile"]
|
|
@@ -39,7 +46,7 @@ __all__ = ["ModelCatalog", "ModelDownloadManager", "print_model_infos", "ModelPr
|
|
|
39
46
|
@dataclass(frozen=True)
|
|
40
47
|
class ModelProfile:
|
|
41
48
|
"""
|
|
42
|
-
Class for model profile. Add for each model one ModelProfile to the ModelCatalog
|
|
49
|
+
Class for model profile. Add for each model one `ModelProfile` to the `ModelCatalog`
|
|
43
50
|
"""
|
|
44
51
|
|
|
45
52
|
name: str
|
|
@@ -58,10 +65,12 @@ class ModelProfile:
|
|
|
58
65
|
dl_library: Optional[str] = field(default=None)
|
|
59
66
|
model_wrapper: Optional[str] = field(default=None)
|
|
60
67
|
architecture: Optional[str] = field(default=None)
|
|
68
|
+
padding: Optional[bool] = field(default=None)
|
|
61
69
|
|
|
62
70
|
def as_dict(self) -> dict[str, Any]:
|
|
63
71
|
"""
|
|
64
|
-
|
|
72
|
+
Returns:
|
|
73
|
+
A dict of the dataclass
|
|
65
74
|
"""
|
|
66
75
|
return asdict(self)
|
|
67
76
|
|
|
@@ -72,757 +81,56 @@ class ModelCatalog:
|
|
|
72
81
|
|
|
73
82
|
To get an overview of all registered models
|
|
74
83
|
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
75
86
|
print(ModelCatalog.get_model_list())
|
|
87
|
+
```
|
|
76
88
|
|
|
77
89
|
To get a model card for some specific model:
|
|
78
90
|
|
|
91
|
+
Example:
|
|
92
|
+
```python
|
|
79
93
|
profile = ModelCatalog.get_profile("layout/model-800000_inf_only.data-00000-of-00001")
|
|
80
94
|
print(profile.description)
|
|
95
|
+
```
|
|
81
96
|
|
|
82
97
|
Some models will have their weights and configs stored in the cache. To instantiate predictors one will sometimes
|
|
83
98
|
need their path. Use
|
|
84
99
|
|
|
100
|
+
Example:
|
|
101
|
+
```python
|
|
85
102
|
path_weights = ModelCatalog.get_full_path_configs("layout/model-800000_inf_only.data-00000-of-00001")
|
|
86
103
|
path_configs = ModelCatalog.get_full_path_weights("layout/model-800000_inf_only.data-00000-of-00001")
|
|
104
|
+
```
|
|
87
105
|
|
|
88
106
|
To register a new model
|
|
89
107
|
|
|
108
|
+
Example:
|
|
109
|
+
```python
|
|
90
110
|
ModelCatalog.get_full_path_configs("my_new_model")
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Attributes:
|
|
114
|
+
CATALOG (dict[str, ModelProfile]): A dict of model profiles. The key is the model name and the value is a
|
|
115
|
+
`ModelProfile` object.
|
|
91
116
|
"""
|
|
92
117
|
|
|
93
|
-
CATALOG: dict[str, ModelProfile] = {
|
|
94
|
-
"layout/model-800000_inf_only.data-00000-of-00001": ModelProfile(
|
|
95
|
-
name="layout/model-800000_inf_only.data-00000-of-00001",
|
|
96
|
-
description="Tensorpack layout model for inference purposes trained on Publaynet",
|
|
97
|
-
config="dd/tp/conf_frcnn_layout.yaml",
|
|
98
|
-
size=[274552244, 7907],
|
|
99
|
-
tp_model=True,
|
|
100
|
-
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
|
|
101
|
-
hf_model_name="model-800000_inf_only",
|
|
102
|
-
hf_config_file=["conf_frcnn_layout.yaml"],
|
|
103
|
-
categories={
|
|
104
|
-
1: LayoutType.TEXT,
|
|
105
|
-
2: LayoutType.TITLE,
|
|
106
|
-
3: LayoutType.LIST,
|
|
107
|
-
4: LayoutType.TABLE,
|
|
108
|
-
5: LayoutType.FIGURE,
|
|
109
|
-
},
|
|
110
|
-
dl_library="TF",
|
|
111
|
-
model_wrapper="TPFrcnnDetector",
|
|
112
|
-
),
|
|
113
|
-
"cell/model-1800000_inf_only.data-00000-of-00001": ModelProfile(
|
|
114
|
-
name="cell/model-1800000_inf_only.data-00000-of-00001",
|
|
115
|
-
description="Tensorpack cell detection model for inference purposes trained on Pubtabnet",
|
|
116
|
-
config="dd/tp/conf_frcnn_cell.yaml",
|
|
117
|
-
size=[274503056, 8056],
|
|
118
|
-
tp_model=True,
|
|
119
|
-
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
120
|
-
hf_model_name="model-1800000_inf_only",
|
|
121
|
-
hf_config_file=["conf_frcnn_cell.yaml"],
|
|
122
|
-
categories={1: LayoutType.CELL},
|
|
123
|
-
dl_library="TF",
|
|
124
|
-
model_wrapper="TPFrcnnDetector",
|
|
125
|
-
),
|
|
126
|
-
"item/model-1620000_inf_only.data-00000-of-00001": ModelProfile(
|
|
127
|
-
name="item/model-1620000_inf_only.data-00000-of-00001",
|
|
128
|
-
description="Tensorpack row/column detection model for inference purposes trained on Pubtabnet",
|
|
129
|
-
config="dd/tp/conf_frcnn_rows.yaml",
|
|
130
|
-
size=[274515344, 7904],
|
|
131
|
-
tp_model=True,
|
|
132
|
-
hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
133
|
-
hf_model_name="model-1620000_inf_only",
|
|
134
|
-
hf_config_file=["conf_frcnn_rows.yaml"],
|
|
135
|
-
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
136
|
-
dl_library="TF",
|
|
137
|
-
model_wrapper="TPFrcnnDetector",
|
|
138
|
-
),
|
|
139
|
-
"layout/d2_model_0829999_layout_inf_only.pt": ModelProfile(
|
|
140
|
-
name="layout/d2_model_0829999_layout_inf_only.pt",
|
|
141
|
-
description="Detectron2 layout detection model trained on Publaynet",
|
|
142
|
-
config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml",
|
|
143
|
-
size=[274632215],
|
|
144
|
-
tp_model=False,
|
|
145
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
|
|
146
|
-
hf_model_name="d2_model_0829999_layout_inf_only.pt",
|
|
147
|
-
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
148
|
-
categories={
|
|
149
|
-
1: LayoutType.TEXT,
|
|
150
|
-
2: LayoutType.TITLE,
|
|
151
|
-
3: LayoutType.LIST,
|
|
152
|
-
4: LayoutType.TABLE,
|
|
153
|
-
5: LayoutType.FIGURE,
|
|
154
|
-
},
|
|
155
|
-
dl_library="PT",
|
|
156
|
-
model_wrapper="D2FrcnnDetector",
|
|
157
|
-
),
|
|
158
|
-
"layout/d2_model_0829999_layout_inf_only.ts": ModelProfile(
|
|
159
|
-
name="layout/d2_model_0829999_layout_inf_only.ts",
|
|
160
|
-
description="Detectron2 layout detection model trained on Publaynet. Torchscript export",
|
|
161
|
-
config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN_TS.yaml",
|
|
162
|
-
size=[274974842],
|
|
163
|
-
tp_model=False,
|
|
164
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
|
|
165
|
-
hf_model_name="d2_model_0829999_layout_inf_only.ts",
|
|
166
|
-
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
167
|
-
categories={
|
|
168
|
-
1: LayoutType.TEXT,
|
|
169
|
-
2: LayoutType.TITLE,
|
|
170
|
-
3: LayoutType.LIST,
|
|
171
|
-
4: LayoutType.TABLE,
|
|
172
|
-
5: LayoutType.FIGURE,
|
|
173
|
-
},
|
|
174
|
-
dl_library="PT",
|
|
175
|
-
model_wrapper="D2FrcnnTracingDetector",
|
|
176
|
-
),
|
|
177
|
-
"cell/d2_model_1849999_cell_inf_only.pt": ModelProfile(
|
|
178
|
-
name="cell/d2_model_1849999_cell_inf_only.pt",
|
|
179
|
-
description="Detectron2 cell detection inference only model trained on Pubtabnet",
|
|
180
|
-
config="dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml",
|
|
181
|
-
size=[274583063],
|
|
182
|
-
tp_model=False,
|
|
183
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
184
|
-
hf_model_name="d2_model_1849999_cell_inf_only.pt",
|
|
185
|
-
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
186
|
-
categories={1: LayoutType.CELL},
|
|
187
|
-
dl_library="PT",
|
|
188
|
-
model_wrapper="D2FrcnnDetector",
|
|
189
|
-
),
|
|
190
|
-
"cell/d2_model_1849999_cell_inf_only.ts": ModelProfile(
|
|
191
|
-
name="cell/d2_model_1849999_cell_inf_only.ts",
|
|
192
|
-
description="Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export",
|
|
193
|
-
config="dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN_TS.yaml",
|
|
194
|
-
size=[274898682],
|
|
195
|
-
tp_model=False,
|
|
196
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
|
|
197
|
-
hf_model_name="d2_model_1849999_cell_inf_only.ts",
|
|
198
|
-
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
199
|
-
categories={1: LayoutType.CELL},
|
|
200
|
-
dl_library="PT",
|
|
201
|
-
model_wrapper="D2FrcnnTracingDetector",
|
|
202
|
-
),
|
|
203
|
-
"item/d2_model_1639999_item_inf_only.pt": ModelProfile(
|
|
204
|
-
name="item/d2_model_1639999_item_inf_only.pt",
|
|
205
|
-
description="Detectron2 item detection model inference only trained on Pubtabnet",
|
|
206
|
-
config="dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml",
|
|
207
|
-
size=[274595351],
|
|
208
|
-
tp_model=False,
|
|
209
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
210
|
-
hf_model_name="d2_model_1639999_item_inf_only.pt",
|
|
211
|
-
hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
|
|
212
|
-
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
213
|
-
dl_library="PT",
|
|
214
|
-
model_wrapper="D2FrcnnDetector",
|
|
215
|
-
),
|
|
216
|
-
"item/d2_model_1639999_item_inf_only.ts": ModelProfile(
|
|
217
|
-
name="item/d2_model_1639999_item_inf_only.ts",
|
|
218
|
-
description="Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export",
|
|
219
|
-
config="dd/d2/item/CASCADE_RCNN_R_50_FPN_GN_TS.yaml",
|
|
220
|
-
size=[274910970],
|
|
221
|
-
tp_model=False,
|
|
222
|
-
hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
|
|
223
|
-
hf_model_name="d2_model_1639999_item_inf_only.ts",
|
|
224
|
-
hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
|
|
225
|
-
categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
|
|
226
|
-
dl_library="PT",
|
|
227
|
-
model_wrapper="D2FrcnnTracingDetector",
|
|
228
|
-
),
|
|
229
|
-
"nielsr/lilt-xlm-roberta-base/pytorch_model.bin": ModelProfile(
|
|
230
|
-
name="nielsr/lilt-xlm-roberta-base/pytorch_model.bin",
|
|
231
|
-
description="LiLT build with a RobertaXLM base model",
|
|
232
|
-
config="nielsr/lilt-xlm-roberta-base/config.json",
|
|
233
|
-
size=[1136743583],
|
|
234
|
-
tp_model=False,
|
|
235
|
-
hf_repo_id="nielsr/lilt-xlm-roberta-base",
|
|
236
|
-
hf_model_name="pytorch_model.bin",
|
|
237
|
-
hf_config_file=["config.json"],
|
|
238
|
-
dl_library="PT",
|
|
239
|
-
),
|
|
240
|
-
"SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin": ModelProfile(
|
|
241
|
-
name="SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin",
|
|
242
|
-
description="Language-Independent Layout Transformer - InfoXLM model by stitching a pre-trained InfoXLM"
|
|
243
|
-
" and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced"
|
|
244
|
-
" in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for"
|
|
245
|
-
" Structured Document Understanding by Wang et al. and first released in this repository.",
|
|
246
|
-
config="SCUT-DLVCLab/lilt-infoxlm-base/config.json",
|
|
247
|
-
size=[1136743583],
|
|
248
|
-
tp_model=False,
|
|
249
|
-
hf_repo_id="SCUT-DLVCLab/lilt-infoxlm-base",
|
|
250
|
-
hf_model_name="pytorch_model.bin",
|
|
251
|
-
hf_config_file=["config.json"],
|
|
252
|
-
dl_library="PT",
|
|
253
|
-
),
|
|
254
|
-
"SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin": ModelProfile(
|
|
255
|
-
name="SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin",
|
|
256
|
-
description="Language-Independent Layout Transformer - RoBERTa model by stitching a pre-trained RoBERTa"
|
|
257
|
-
" (English) and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was"
|
|
258
|
-
" introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer"
|
|
259
|
-
" for Structured Document Understanding by Wang et al. and first released in this repository.",
|
|
260
|
-
config="SCUT-DLVCLab/lilt-roberta-en-base/config.json",
|
|
261
|
-
size=[523151519],
|
|
262
|
-
tp_model=False,
|
|
263
|
-
hf_repo_id="SCUT-DLVCLab/lilt-roberta-en-base",
|
|
264
|
-
hf_model_name="pytorch_model.bin",
|
|
265
|
-
hf_config_file=["config.json"],
|
|
266
|
-
dl_library="PT",
|
|
267
|
-
),
|
|
268
|
-
"microsoft/layoutlm-base-uncased/pytorch_model.bin": ModelProfile(
|
|
269
|
-
name="microsoft/layoutlm-base-uncased/pytorch_model.bin",
|
|
270
|
-
description="LayoutLM is a simple but effective pre-training method of text and layout for document image"
|
|
271
|
-
" understanding and information extraction tasks, such as form understanding and receipt"
|
|
272
|
-
" understanding. LayoutLM archived the SOTA results on multiple datasets. This model does not"
|
|
273
|
-
"contain any head and has to be fine tuned on a downstream task. This is model has been trained "
|
|
274
|
-
"on 11M documents for 2 epochs. Configuration: 12-layer, 768-hidden, 12-heads, 113M parameters",
|
|
275
|
-
size=[453093832],
|
|
276
|
-
tp_model=False,
|
|
277
|
-
config="microsoft/layoutlm-base-uncased/config.json",
|
|
278
|
-
hf_repo_id="microsoft/layoutlm-base-uncased",
|
|
279
|
-
hf_model_name="pytorch_model.bin",
|
|
280
|
-
hf_config_file=["config.json"],
|
|
281
|
-
dl_library="PT",
|
|
282
|
-
),
|
|
283
|
-
"microsoft/layoutlm-large-uncased/pytorch_model.bin": ModelProfile(
|
|
284
|
-
name="microsoft/layoutlm-large-uncased/pytorch_model.bin",
|
|
285
|
-
description="LayoutLM is a simple but effective pre-training method of text and layout for document image"
|
|
286
|
-
" understanding and information extraction tasks, such as form understanding and receipt"
|
|
287
|
-
" understanding. LayoutLM archived the SOTA results on multiple datasets. This model does not"
|
|
288
|
-
"contain any head and has to be fine tuned on a downstream task. This is model has been trained"
|
|
289
|
-
" on 11M documents for 2 epochs. Configuration: 24-layer, 1024-hidden, 16-heads, 343M parameters",
|
|
290
|
-
size=[1361845448],
|
|
291
|
-
tp_model=False,
|
|
292
|
-
config="microsoft/layoutlm-large-uncased/config.json",
|
|
293
|
-
hf_repo_id="microsoft/layoutlm-large-uncased",
|
|
294
|
-
hf_model_name="pytorch_model.bin",
|
|
295
|
-
hf_config_file=["config.json"],
|
|
296
|
-
dl_library="PT",
|
|
297
|
-
),
|
|
298
|
-
"microsoft/layoutlmv2-base-uncased/pytorch_model.bin": ModelProfile(
|
|
299
|
-
name="microsoft/layoutlmv2-base-uncased/pytorch_model.bin",
|
|
300
|
-
description="LayoutLMv2 is an improved version of LayoutLM with new pre-training tasks to model the"
|
|
301
|
-
" interaction among text, layout, and image in a single multi-modal framework. It outperforms"
|
|
302
|
-
" strong baselines and achieves new state-of-the-art results on a wide variety of downstream"
|
|
303
|
-
" visually-rich document understanding tasks, including , including FUNSD (0.7895 → 0.8420),"
|
|
304
|
-
" CORD (0.9493 → 0.9601), SROIE (0.9524 → 0.9781), Kleister-NDA (0.834 → 0.852), RVL-CDIP"
|
|
305
|
-
" (0.9443 → 0.9564), and DocVQA (0.7295 → 0.8672). The license is cc-by-nc-sa-4.0",
|
|
306
|
-
size=[802243295],
|
|
307
|
-
tp_model=False,
|
|
308
|
-
config="microsoft/layoutlmv2-base-uncased/config.json",
|
|
309
|
-
hf_repo_id="microsoft/layoutlmv2-base-uncased",
|
|
310
|
-
hf_model_name="pytorch_model.bin",
|
|
311
|
-
hf_config_file=["config.json"],
|
|
312
|
-
dl_library="PT",
|
|
313
|
-
),
|
|
314
|
-
"microsoft/layoutxlm-base/pytorch_model.bin": ModelProfile(
|
|
315
|
-
name="microsoft/layoutxlm-base/pytorch_model.bin",
|
|
316
|
-
description="Multimodal pre-training with text, layout, and image has achieved SOTA performance for "
|
|
317
|
-
"visually-rich document understanding tasks recently, which demonstrates the great potential"
|
|
318
|
-
" for joint learning across different modalities. In this paper, we present LayoutXLM, a"
|
|
319
|
-
" multimodal pre-trained model for multilingual document understanding, which aims to bridge"
|
|
320
|
-
" the language barriers for visually-rich document understanding. To accurately evaluate"
|
|
321
|
-
" LayoutXLM, we also introduce a multilingual form understanding benchmark dataset named XFUN,"
|
|
322
|
-
" which includes form understanding samples in 7 languages (Chinese, Japanese, Spanish, French,"
|
|
323
|
-
" Italian, German, Portuguese), and key-value pairs are manually labeled for each language."
|
|
324
|
-
" Experiment results show that the LayoutXLM model has significantly outperformed the existing"
|
|
325
|
-
" SOTA cross-lingual pre-trained models on the XFUN dataset. The license is cc-by-nc-sa-4.0",
|
|
326
|
-
size=[1476537178],
|
|
327
|
-
tp_model=False,
|
|
328
|
-
config="microsoft/layoutxlm-base/config.json",
|
|
329
|
-
hf_repo_id="microsoft/layoutxlm-base",
|
|
330
|
-
hf_model_name="pytorch_model.bin",
|
|
331
|
-
hf_config_file=["config.json"],
|
|
332
|
-
dl_library="PT",
|
|
333
|
-
),
|
|
334
|
-
"microsoft/layoutlmv3-base/pytorch_model.bin": ModelProfile(
|
|
335
|
-
name="microsoft/layoutlmv3-base/pytorch_model.bin",
|
|
336
|
-
description="LayoutLMv3 is a pre-trained multimodal Transformer for Document AI with unified text and"
|
|
337
|
-
" image masking. The simple unified architecture and training objectives make LayoutLMv3 a"
|
|
338
|
-
" general-purpose pre-trained model. For example, LayoutLMv3 can be fine-tuned for both"
|
|
339
|
-
" text-centric tasks, including form understanding, receipt understanding, and document"
|
|
340
|
-
" visual question answering, and image-centric tasks such as document image classification"
|
|
341
|
-
" and document layout analysis. The license is cc-by-nc-sa-4.0",
|
|
342
|
-
size=[501380823],
|
|
343
|
-
tp_model=False,
|
|
344
|
-
config="microsoft/layoutlmv3-base/config.json",
|
|
345
|
-
hf_repo_id="microsoft/layoutlmv3-base",
|
|
346
|
-
hf_model_name="pytorch_model.bin",
|
|
347
|
-
hf_config_file=["config.json"],
|
|
348
|
-
dl_library="PT",
|
|
349
|
-
),
|
|
350
|
-
"microsoft/table-transformer-detection/pytorch_model.bin": ModelProfile(
|
|
351
|
-
name="microsoft/table-transformer-detection/pytorch_model.bin",
|
|
352
|
-
description="Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper "
|
|
353
|
-
"PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et "
|
|
354
|
-
"al. This model is devoted to table detection",
|
|
355
|
-
size=[115393245],
|
|
356
|
-
tp_model=False,
|
|
357
|
-
config="microsoft/table-transformer-detection/config.json",
|
|
358
|
-
preprocessor_config="microsoft/table-transformer-detection/preprocessor_config.json",
|
|
359
|
-
hf_repo_id="microsoft/table-transformer-detection",
|
|
360
|
-
hf_model_name="pytorch_model.bin",
|
|
361
|
-
hf_config_file=["config.json", "preprocessor_config.json"],
|
|
362
|
-
categories={1: LayoutType.TABLE, 2: LayoutType.TABLE_ROTATED},
|
|
363
|
-
dl_library="PT",
|
|
364
|
-
model_wrapper="HFDetrDerivedDetector",
|
|
365
|
-
),
|
|
366
|
-
"microsoft/table-transformer-structure-recognition/pytorch_model.bin": ModelProfile(
|
|
367
|
-
name="microsoft/table-transformer-structure-recognition/pytorch_model.bin",
|
|
368
|
-
description="Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper "
|
|
369
|
-
"PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et "
|
|
370
|
-
"al. This model is devoted to table structure recognition and assumes to receive a cropped"
|
|
371
|
-
"table as input. It will predict rows, column and spanning cells",
|
|
372
|
-
size=[115509981],
|
|
373
|
-
tp_model=False,
|
|
374
|
-
config="microsoft/table-transformer-structure-recognition/config.json",
|
|
375
|
-
preprocessor_config="microsoft/table-transformer-structure-recognition/preprocessor_config.json",
|
|
376
|
-
hf_repo_id="microsoft/table-transformer-structure-recognition",
|
|
377
|
-
hf_model_name="pytorch_model.bin",
|
|
378
|
-
hf_config_file=["config.json", "preprocessor_config.json"],
|
|
379
|
-
categories={
|
|
380
|
-
1: LayoutType.TABLE,
|
|
381
|
-
2: LayoutType.COLUMN,
|
|
382
|
-
3: LayoutType.ROW,
|
|
383
|
-
4: CellType.COLUMN_HEADER,
|
|
384
|
-
5: CellType.PROJECTED_ROW_HEADER,
|
|
385
|
-
6: CellType.SPANNING,
|
|
386
|
-
},
|
|
387
|
-
dl_library="PT",
|
|
388
|
-
model_wrapper="HFDetrDerivedDetector",
|
|
389
|
-
),
|
|
390
|
-
"doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt": ModelProfile(
|
|
391
|
-
name="doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt",
|
|
392
|
-
description="Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable "
|
|
393
|
-
"Binarization”. For more information please check "
|
|
394
|
-
"https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.",
|
|
395
|
-
size=[101971449],
|
|
396
|
-
urls=["https://doctr-static.mindee.com/models?id=v0.3.1/db_resnet50-ac60cadc.pt&src=0"],
|
|
397
|
-
categories={1: LayoutType.WORD},
|
|
398
|
-
dl_library="PT",
|
|
399
|
-
model_wrapper="DoctrTextlineDetector",
|
|
400
|
-
architecture="db_resnet50",
|
|
401
|
-
),
|
|
402
|
-
"doctr/db_resnet50/tf/db_resnet50-adcafc63.zip": ModelProfile(
|
|
403
|
-
name="doctr/db_resnet50/tf/db_resnet50-adcafc63.zip",
|
|
404
|
-
description="Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable "
|
|
405
|
-
"Binarization”. For more information please check "
|
|
406
|
-
"https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.",
|
|
407
|
-
size=[94178964],
|
|
408
|
-
urls=["https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0"],
|
|
409
|
-
categories={1: LayoutType.WORD},
|
|
410
|
-
dl_library="TF",
|
|
411
|
-
model_wrapper="DoctrTextlineDetector",
|
|
412
|
-
architecture="db_resnet50",
|
|
413
|
-
),
|
|
414
|
-
"doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt": ModelProfile(
|
|
415
|
-
name="doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt",
|
|
416
|
-
description="Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based "
|
|
417
|
-
"Sequence Recognition and Its Application to Scene Text Recognition”. For more information "
|
|
418
|
-
"please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch "
|
|
419
|
-
"artefact.",
|
|
420
|
-
size=[63286381],
|
|
421
|
-
urls=["https://doctr-static.mindee.com/models?id=v0.3.1/crnn_vgg16_bn-9762b0b0.pt&src=0"],
|
|
422
|
-
dl_library="PT",
|
|
423
|
-
model_wrapper="DoctrTextRecognizer",
|
|
424
|
-
architecture="crnn_vgg16_bn",
|
|
425
|
-
),
|
|
426
|
-
"doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip": ModelProfile(
|
|
427
|
-
name="doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip",
|
|
428
|
-
description="Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based "
|
|
429
|
-
"Sequence Recognition and Its Application to Scene Text Recognition”. For more information "
|
|
430
|
-
"please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow "
|
|
431
|
-
"artefact.",
|
|
432
|
-
size=[58758994],
|
|
433
|
-
urls=["https://doctr-static.mindee.com/models?id=v0.3.0/crnn_vgg16_bn-76b7f2c6.zip&src=0"],
|
|
434
|
-
dl_library="TF",
|
|
435
|
-
model_wrapper="DoctrTextRecognizer",
|
|
436
|
-
architecture="crnn_vgg16_bn",
|
|
437
|
-
),
|
|
438
|
-
"FacebookAI/xlm-roberta-base": ModelProfile(
|
|
439
|
-
name="FacebookAI/xlm-roberta-base/pytorch_model.bin",
|
|
440
|
-
description="XLM-RoBERTa model pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages."
|
|
441
|
-
" It was introduced in the paper Unsupervised Cross-lingual Representation Learning at Scale"
|
|
442
|
-
" by Conneau et al. and first released in this repository.",
|
|
443
|
-
size=[1115590446],
|
|
444
|
-
tp_model=False,
|
|
445
|
-
config="FacebookAI/xlm-roberta-base/config.json",
|
|
446
|
-
hf_repo_id="FacebookAI/xlm-roberta-base",
|
|
447
|
-
hf_model_name="pytorch_model.bin",
|
|
448
|
-
hf_config_file=["config.json"],
|
|
449
|
-
dl_library="PT",
|
|
450
|
-
),
|
|
451
|
-
"fasttext/lid.176.bin": ModelProfile(
|
|
452
|
-
name="fasttext/lid.176.bin",
|
|
453
|
-
description="Fasttext language detection model",
|
|
454
|
-
size=[131266198],
|
|
455
|
-
urls=["https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"],
|
|
456
|
-
categories={
|
|
457
|
-
1: Languages.ENGLISH,
|
|
458
|
-
2: Languages.RUSSIAN,
|
|
459
|
-
3: Languages.GERMAN,
|
|
460
|
-
4: Languages.FRENCH,
|
|
461
|
-
5: Languages.ITALIAN,
|
|
462
|
-
6: Languages.JAPANESE,
|
|
463
|
-
7: Languages.SPANISH,
|
|
464
|
-
8: Languages.CEBUANO,
|
|
465
|
-
9: Languages.TURKISH,
|
|
466
|
-
10: Languages.PORTUGUESE,
|
|
467
|
-
11: Languages.UKRAINIAN,
|
|
468
|
-
12: Languages.ESPERANTO,
|
|
469
|
-
13: Languages.POLISH,
|
|
470
|
-
14: Languages.SWEDISH,
|
|
471
|
-
15: Languages.DUTCH,
|
|
472
|
-
16: Languages.HEBREW,
|
|
473
|
-
17: Languages.CHINESE,
|
|
474
|
-
18: Languages.HUNGARIAN,
|
|
475
|
-
19: Languages.ARABIC,
|
|
476
|
-
20: Languages.CATALAN,
|
|
477
|
-
21: Languages.FINNISH,
|
|
478
|
-
22: Languages.CZECH,
|
|
479
|
-
23: Languages.PERSIAN,
|
|
480
|
-
24: Languages.SERBIAN,
|
|
481
|
-
25: Languages.GREEK,
|
|
482
|
-
26: Languages.VIETNAMESE,
|
|
483
|
-
27: Languages.BULGARIAN,
|
|
484
|
-
28: Languages.KOREAN,
|
|
485
|
-
29: Languages.NORWEGIAN,
|
|
486
|
-
30: Languages.MACEDONIAN,
|
|
487
|
-
31: Languages.ROMANIAN,
|
|
488
|
-
32: Languages.INDONESIAN,
|
|
489
|
-
33: Languages.THAI,
|
|
490
|
-
34: Languages.ARMENIAN,
|
|
491
|
-
35: Languages.DANISH,
|
|
492
|
-
36: Languages.TAMIL,
|
|
493
|
-
37: Languages.HINDI,
|
|
494
|
-
38: Languages.CROATIAN,
|
|
495
|
-
39: Languages.NOT_DEFINED,
|
|
496
|
-
40: Languages.BELARUSIAN,
|
|
497
|
-
41: Languages.GEORGIAN,
|
|
498
|
-
42: Languages.TELUGU,
|
|
499
|
-
43: Languages.KAZAKH,
|
|
500
|
-
44: Languages.WARAY,
|
|
501
|
-
45: Languages.LITHUANIAN,
|
|
502
|
-
46: Languages.SCOTTISH,
|
|
503
|
-
47: Languages.SLOVAK,
|
|
504
|
-
48: Languages.BENIN,
|
|
505
|
-
49: Languages.BASQUE,
|
|
506
|
-
50: Languages.SLOVENIAN,
|
|
507
|
-
51: Languages.NOT_DEFINED,
|
|
508
|
-
52: Languages.MALAYALAM,
|
|
509
|
-
53: Languages.MARATHI,
|
|
510
|
-
54: Languages.ESTONIAN,
|
|
511
|
-
55: Languages.AZERBAIJANI,
|
|
512
|
-
56: Languages.NOT_DEFINED,
|
|
513
|
-
57: Languages.ALBANIAN,
|
|
514
|
-
58: Languages.LATIN,
|
|
515
|
-
59: Languages.BOSNIAN,
|
|
516
|
-
60: Languages.NORWEGIAN_NOVOSIBIRSK,
|
|
517
|
-
61: Languages.URDU,
|
|
518
|
-
62: Languages.NOT_DEFINED,
|
|
519
|
-
63: Languages.NOT_DEFINED,
|
|
520
|
-
64: Languages.NOT_DEFINED,
|
|
521
|
-
65: Languages.NOT_DEFINED,
|
|
522
|
-
66: Languages.NOT_DEFINED,
|
|
523
|
-
67: Languages.NOT_DEFINED,
|
|
524
|
-
68: Languages.NOT_DEFINED,
|
|
525
|
-
69: Languages.NOT_DEFINED,
|
|
526
|
-
70: Languages.NOT_DEFINED,
|
|
527
|
-
71: Languages.NOT_DEFINED,
|
|
528
|
-
72: Languages.NOT_DEFINED,
|
|
529
|
-
73: Languages.NOT_DEFINED,
|
|
530
|
-
74: Languages.NOT_DEFINED,
|
|
531
|
-
75: Languages.NOT_DEFINED,
|
|
532
|
-
76: Languages.NOT_DEFINED,
|
|
533
|
-
77: Languages.NOT_DEFINED,
|
|
534
|
-
78: Languages.NOT_DEFINED,
|
|
535
|
-
79: Languages.NOT_DEFINED,
|
|
536
|
-
80: Languages.NOT_DEFINED,
|
|
537
|
-
81: Languages.NOT_DEFINED,
|
|
538
|
-
82: Languages.NOT_DEFINED,
|
|
539
|
-
83: Languages.NOT_DEFINED,
|
|
540
|
-
84: Languages.NOT_DEFINED,
|
|
541
|
-
85: Languages.NOT_DEFINED,
|
|
542
|
-
86: Languages.NOT_DEFINED,
|
|
543
|
-
87: Languages.NOT_DEFINED,
|
|
544
|
-
88: Languages.NOT_DEFINED,
|
|
545
|
-
89: Languages.NOT_DEFINED,
|
|
546
|
-
90: Languages.NOT_DEFINED,
|
|
547
|
-
91: Languages.NOT_DEFINED,
|
|
548
|
-
92: Languages.NOT_DEFINED,
|
|
549
|
-
93: Languages.NOT_DEFINED,
|
|
550
|
-
94: Languages.NOT_DEFINED,
|
|
551
|
-
95: Languages.NOT_DEFINED,
|
|
552
|
-
96: Languages.NOT_DEFINED,
|
|
553
|
-
97: Languages.NOT_DEFINED,
|
|
554
|
-
98: Languages.NOT_DEFINED,
|
|
555
|
-
99: Languages.NOT_DEFINED,
|
|
556
|
-
100: Languages.NOT_DEFINED,
|
|
557
|
-
101: Languages.NOT_DEFINED,
|
|
558
|
-
102: Languages.NOT_DEFINED,
|
|
559
|
-
103: Languages.NOT_DEFINED,
|
|
560
|
-
104: Languages.NOT_DEFINED,
|
|
561
|
-
105: Languages.NOT_DEFINED,
|
|
562
|
-
106: Languages.NOT_DEFINED,
|
|
563
|
-
107: Languages.NOT_DEFINED,
|
|
564
|
-
108: Languages.NOT_DEFINED,
|
|
565
|
-
109: Languages.NOT_DEFINED,
|
|
566
|
-
110: Languages.NOT_DEFINED,
|
|
567
|
-
111: Languages.NOT_DEFINED,
|
|
568
|
-
112: Languages.NOT_DEFINED,
|
|
569
|
-
113: Languages.NOT_DEFINED,
|
|
570
|
-
114: Languages.NOT_DEFINED,
|
|
571
|
-
115: Languages.NOT_DEFINED,
|
|
572
|
-
116: Languages.NOT_DEFINED,
|
|
573
|
-
117: Languages.NOT_DEFINED,
|
|
574
|
-
118: Languages.NOT_DEFINED,
|
|
575
|
-
119: Languages.NOT_DEFINED,
|
|
576
|
-
120: Languages.NOT_DEFINED,
|
|
577
|
-
121: Languages.NOT_DEFINED,
|
|
578
|
-
122: Languages.NOT_DEFINED,
|
|
579
|
-
123: Languages.NOT_DEFINED,
|
|
580
|
-
124: Languages.NOT_DEFINED,
|
|
581
|
-
125: Languages.NOT_DEFINED,
|
|
582
|
-
126: Languages.NOT_DEFINED,
|
|
583
|
-
127: Languages.NOT_DEFINED,
|
|
584
|
-
128: Languages.NOT_DEFINED,
|
|
585
|
-
129: Languages.NOT_DEFINED,
|
|
586
|
-
130: Languages.NOT_DEFINED,
|
|
587
|
-
131: Languages.NOT_DEFINED,
|
|
588
|
-
132: Languages.NOT_DEFINED,
|
|
589
|
-
133: Languages.NOT_DEFINED,
|
|
590
|
-
134: Languages.NOT_DEFINED,
|
|
591
|
-
135: Languages.NOT_DEFINED,
|
|
592
|
-
136: Languages.NOT_DEFINED,
|
|
593
|
-
137: Languages.NOT_DEFINED,
|
|
594
|
-
138: Languages.NOT_DEFINED,
|
|
595
|
-
139: Languages.NOT_DEFINED,
|
|
596
|
-
140: Languages.NOT_DEFINED,
|
|
597
|
-
141: Languages.NOT_DEFINED,
|
|
598
|
-
142: Languages.NOT_DEFINED,
|
|
599
|
-
143: Languages.NOT_DEFINED,
|
|
600
|
-
144: Languages.NOT_DEFINED,
|
|
601
|
-
145: Languages.NOT_DEFINED,
|
|
602
|
-
146: Languages.NOT_DEFINED,
|
|
603
|
-
147: Languages.NOT_DEFINED,
|
|
604
|
-
148: Languages.NOT_DEFINED,
|
|
605
|
-
149: Languages.NOT_DEFINED,
|
|
606
|
-
150: Languages.NOT_DEFINED,
|
|
607
|
-
151: Languages.NOT_DEFINED,
|
|
608
|
-
152: Languages.NOT_DEFINED,
|
|
609
|
-
153: Languages.NOT_DEFINED,
|
|
610
|
-
154: Languages.NOT_DEFINED,
|
|
611
|
-
155: Languages.NOT_DEFINED,
|
|
612
|
-
156: Languages.NOT_DEFINED,
|
|
613
|
-
157: Languages.NOT_DEFINED,
|
|
614
|
-
158: Languages.NOT_DEFINED,
|
|
615
|
-
159: Languages.NOT_DEFINED,
|
|
616
|
-
160: Languages.NOT_DEFINED,
|
|
617
|
-
161: Languages.NOT_DEFINED,
|
|
618
|
-
162: Languages.NOT_DEFINED,
|
|
619
|
-
163: Languages.NOT_DEFINED,
|
|
620
|
-
164: Languages.NOT_DEFINED,
|
|
621
|
-
165: Languages.NOT_DEFINED,
|
|
622
|
-
166: Languages.NOT_DEFINED,
|
|
623
|
-
167: Languages.NOT_DEFINED,
|
|
624
|
-
168: Languages.NOT_DEFINED,
|
|
625
|
-
169: Languages.NOT_DEFINED,
|
|
626
|
-
170: Languages.NOT_DEFINED,
|
|
627
|
-
171: Languages.NOT_DEFINED,
|
|
628
|
-
172: Languages.NOT_DEFINED,
|
|
629
|
-
173: Languages.NOT_DEFINED,
|
|
630
|
-
174: Languages.NOT_DEFINED,
|
|
631
|
-
175: Languages.NOT_DEFINED,
|
|
632
|
-
176: Languages.NOT_DEFINED,
|
|
633
|
-
},
|
|
634
|
-
categories_orig={
|
|
635
|
-
"__label__en": Languages.ENGLISH,
|
|
636
|
-
"__label__ru": Languages.RUSSIAN,
|
|
637
|
-
"__label__de": Languages.GERMAN,
|
|
638
|
-
"__label__fr": Languages.FRENCH,
|
|
639
|
-
"__label__it": Languages.ITALIAN,
|
|
640
|
-
"__label__ja": Languages.JAPANESE,
|
|
641
|
-
"__label__es": Languages.SPANISH,
|
|
642
|
-
"__label__ceb": Languages.CEBUANO,
|
|
643
|
-
"__label__tr": Languages.TURKISH,
|
|
644
|
-
"__label__pt": Languages.PORTUGUESE,
|
|
645
|
-
"__label__uk": Languages.UKRAINIAN,
|
|
646
|
-
"__label__eo": Languages.ESPERANTO,
|
|
647
|
-
"__label__pl": Languages.POLISH,
|
|
648
|
-
"__label__sv": Languages.SWEDISH,
|
|
649
|
-
"__label__nl": Languages.DUTCH,
|
|
650
|
-
"__label__he": Languages.HEBREW,
|
|
651
|
-
"__label__zh": Languages.CHINESE,
|
|
652
|
-
"__label__hu": Languages.HUNGARIAN,
|
|
653
|
-
"__label__ar": Languages.ARABIC,
|
|
654
|
-
"__label__ca": Languages.CATALAN,
|
|
655
|
-
"__label__fi": Languages.FINNISH,
|
|
656
|
-
"__label__cs": Languages.CZECH,
|
|
657
|
-
"__label__fa": Languages.PERSIAN,
|
|
658
|
-
"__label__sr": Languages.SERBIAN,
|
|
659
|
-
"__label__el": Languages.GREEK,
|
|
660
|
-
"__label__vi": Languages.VIETNAMESE,
|
|
661
|
-
"__label__bg": Languages.BULGARIAN,
|
|
662
|
-
"__label__ko": Languages.KOREAN,
|
|
663
|
-
"__label__no": Languages.NORWEGIAN,
|
|
664
|
-
"__label__mk": Languages.MACEDONIAN,
|
|
665
|
-
"__label__ro": Languages.ROMANIAN,
|
|
666
|
-
"__label__id": Languages.INDONESIAN,
|
|
667
|
-
"__label__th": Languages.THAI,
|
|
668
|
-
"__label__hy": Languages.ARMENIAN,
|
|
669
|
-
"__label__da": Languages.DANISH,
|
|
670
|
-
"__label__ta": Languages.TAMIL,
|
|
671
|
-
"__label__hi": Languages.HINDI,
|
|
672
|
-
"__label__hr": Languages.CROATIAN,
|
|
673
|
-
"__label__sh": Languages.NOT_DEFINED,
|
|
674
|
-
"__label__be": Languages.BELARUSIAN,
|
|
675
|
-
"__label__ka": Languages.GEORGIAN,
|
|
676
|
-
"__label__te": Languages.TELUGU,
|
|
677
|
-
"__label__kk": Languages.KAZAKH,
|
|
678
|
-
"__label__war": Languages.WARAY,
|
|
679
|
-
"__label__lt": Languages.LITHUANIAN,
|
|
680
|
-
"__label__gl": Languages.SCOTTISH,
|
|
681
|
-
"__label__sk": Languages.SLOVAK,
|
|
682
|
-
"__label__bn": Languages.BENIN,
|
|
683
|
-
"__label__eu": Languages.BASQUE,
|
|
684
|
-
"__label__sl": Languages.SLOVENIAN,
|
|
685
|
-
"__label__kn": Languages.NOT_DEFINED,
|
|
686
|
-
"__label__ml": Languages.MALAYALAM,
|
|
687
|
-
"__label__mr": Languages.MARATHI,
|
|
688
|
-
"__label__et": Languages.ESTONIAN,
|
|
689
|
-
"__label__az": Languages.AZERBAIJANI,
|
|
690
|
-
"__label__ms": Languages.NOT_DEFINED,
|
|
691
|
-
"__label__sq": Languages.ALBANIAN,
|
|
692
|
-
"__label__la": Languages.LATIN,
|
|
693
|
-
"__label__bs": Languages.BOSNIAN,
|
|
694
|
-
"__label__nn": Languages.NORWEGIAN_NOVOSIBIRSK,
|
|
695
|
-
"__label__ur": Languages.URDU,
|
|
696
|
-
"__label__lv": Languages.NOT_DEFINED,
|
|
697
|
-
"__label__my": Languages.NOT_DEFINED,
|
|
698
|
-
"__label__tt": Languages.NOT_DEFINED,
|
|
699
|
-
"__label__af": Languages.NOT_DEFINED,
|
|
700
|
-
"__label__oc": Languages.NOT_DEFINED,
|
|
701
|
-
"__label__nds": Languages.NOT_DEFINED,
|
|
702
|
-
"__label__ky": Languages.NOT_DEFINED,
|
|
703
|
-
"__label__ast": Languages.NOT_DEFINED,
|
|
704
|
-
"__label__tl": Languages.NOT_DEFINED,
|
|
705
|
-
"__label__is": Languages.NOT_DEFINED,
|
|
706
|
-
"__label__ia": Languages.NOT_DEFINED,
|
|
707
|
-
"__label__si": Languages.NOT_DEFINED,
|
|
708
|
-
"__label__gu": Languages.NOT_DEFINED,
|
|
709
|
-
"__label__km": Languages.NOT_DEFINED,
|
|
710
|
-
"__label__br": Languages.NOT_DEFINED,
|
|
711
|
-
"__label__ba": Languages.NOT_DEFINED,
|
|
712
|
-
"__label__uz": Languages.NOT_DEFINED,
|
|
713
|
-
"__label__bo": Languages.NOT_DEFINED,
|
|
714
|
-
"__label__pa": Languages.NOT_DEFINED,
|
|
715
|
-
"__label__vo": Languages.NOT_DEFINED,
|
|
716
|
-
"__label__als": Languages.NOT_DEFINED,
|
|
717
|
-
"__label__ne": Languages.NOT_DEFINED,
|
|
718
|
-
"__label__cy": Languages.NOT_DEFINED,
|
|
719
|
-
"__label__jbo": Languages.NOT_DEFINED,
|
|
720
|
-
"__label__fy": Languages.NOT_DEFINED,
|
|
721
|
-
"__label__mn": Languages.NOT_DEFINED,
|
|
722
|
-
"__label__lb": Languages.NOT_DEFINED,
|
|
723
|
-
"__label__ce": Languages.NOT_DEFINED,
|
|
724
|
-
"__label__ug": Languages.NOT_DEFINED,
|
|
725
|
-
"__label__tg": Languages.NOT_DEFINED,
|
|
726
|
-
"__label__sco": Languages.NOT_DEFINED,
|
|
727
|
-
"__label__sa": Languages.NOT_DEFINED,
|
|
728
|
-
"__label__cv": Languages.NOT_DEFINED,
|
|
729
|
-
"__label__jv": Languages.NOT_DEFINED,
|
|
730
|
-
"__label__min": Languages.NOT_DEFINED,
|
|
731
|
-
"__label__io": Languages.NOT_DEFINED,
|
|
732
|
-
"__label__or": Languages.NOT_DEFINED,
|
|
733
|
-
"__label__as": Languages.NOT_DEFINED,
|
|
734
|
-
"__label__new": Languages.NOT_DEFINED,
|
|
735
|
-
"__label__ga": Languages.NOT_DEFINED,
|
|
736
|
-
"__label__mg": Languages.NOT_DEFINED,
|
|
737
|
-
"__label__an": Languages.NOT_DEFINED,
|
|
738
|
-
"__label__ckb": Languages.NOT_DEFINED,
|
|
739
|
-
"__label__sw": Languages.NOT_DEFINED,
|
|
740
|
-
"__label__bar": Languages.NOT_DEFINED,
|
|
741
|
-
"__label__lmo": Languages.NOT_DEFINED,
|
|
742
|
-
"__label__yi": Languages.NOT_DEFINED,
|
|
743
|
-
"__label__arz": Languages.NOT_DEFINED,
|
|
744
|
-
"__label__mhr": Languages.NOT_DEFINED,
|
|
745
|
-
"__label__azb": Languages.NOT_DEFINED,
|
|
746
|
-
"__label__sah": Languages.NOT_DEFINED,
|
|
747
|
-
"__label__pnb": Languages.NOT_DEFINED,
|
|
748
|
-
"__label__su": Languages.NOT_DEFINED,
|
|
749
|
-
"__label__bpy": Languages.NOT_DEFINED,
|
|
750
|
-
"__label__pms": Languages.NOT_DEFINED,
|
|
751
|
-
"__label__ilo": Languages.NOT_DEFINED,
|
|
752
|
-
"__label__wuu": Languages.NOT_DEFINED,
|
|
753
|
-
"__label__ku": Languages.NOT_DEFINED,
|
|
754
|
-
"__label__ps": Languages.NOT_DEFINED,
|
|
755
|
-
"__label__ie": Languages.NOT_DEFINED,
|
|
756
|
-
"__label__xmf": Languages.NOT_DEFINED,
|
|
757
|
-
"__label__yue": Languages.NOT_DEFINED,
|
|
758
|
-
"__label__gom": Languages.NOT_DEFINED,
|
|
759
|
-
"__label__li": Languages.NOT_DEFINED,
|
|
760
|
-
"__label__mwl": Languages.NOT_DEFINED,
|
|
761
|
-
"__label__kw": Languages.NOT_DEFINED,
|
|
762
|
-
"__label__sd": Languages.NOT_DEFINED,
|
|
763
|
-
"__label__hsb": Languages.NOT_DEFINED,
|
|
764
|
-
"__label__scn": Languages.NOT_DEFINED,
|
|
765
|
-
"__label__gd": Languages.NOT_DEFINED,
|
|
766
|
-
"__label__pam": Languages.NOT_DEFINED,
|
|
767
|
-
"__label__bh": Languages.NOT_DEFINED,
|
|
768
|
-
"__label__mai": Languages.NOT_DEFINED,
|
|
769
|
-
"__label__vec": Languages.NOT_DEFINED,
|
|
770
|
-
"__label__mt": Languages.NOT_DEFINED,
|
|
771
|
-
"__label__dv": Languages.NOT_DEFINED,
|
|
772
|
-
"__label__wa": Languages.NOT_DEFINED,
|
|
773
|
-
"__label__mzn": Languages.NOT_DEFINED,
|
|
774
|
-
"__label__am": Languages.NOT_DEFINED,
|
|
775
|
-
"__label__qu": Languages.NOT_DEFINED,
|
|
776
|
-
"__label__eml": Languages.NOT_DEFINED,
|
|
777
|
-
"__label__cbk": Languages.NOT_DEFINED,
|
|
778
|
-
"__label__tk": Languages.NOT_DEFINED,
|
|
779
|
-
"__label__rm": Languages.NOT_DEFINED,
|
|
780
|
-
"__label__os": Languages.NOT_DEFINED,
|
|
781
|
-
"__label__vls": Languages.NOT_DEFINED,
|
|
782
|
-
"__label__yo": Languages.NOT_DEFINED,
|
|
783
|
-
"__label__lo": Languages.NOT_DEFINED,
|
|
784
|
-
"__label__lez": Languages.NOT_DEFINED,
|
|
785
|
-
"__label__so": Languages.NOT_DEFINED,
|
|
786
|
-
"__label__myv": Languages.NOT_DEFINED,
|
|
787
|
-
"__label__diq": Languages.NOT_DEFINED,
|
|
788
|
-
"__label__mrj": Languages.NOT_DEFINED,
|
|
789
|
-
"__label__dsb": Languages.NOT_DEFINED,
|
|
790
|
-
"__label__frr": Languages.NOT_DEFINED,
|
|
791
|
-
"__label__ht": Languages.NOT_DEFINED,
|
|
792
|
-
"__label__gn": Languages.NOT_DEFINED,
|
|
793
|
-
"__label__bxr": Languages.NOT_DEFINED,
|
|
794
|
-
"__label__kv": Languages.NOT_DEFINED,
|
|
795
|
-
"__label__sc": Languages.NOT_DEFINED,
|
|
796
|
-
"__label__nah": Languages.NOT_DEFINED,
|
|
797
|
-
"__label__krc": Languages.NOT_DEFINED,
|
|
798
|
-
"__label__bcl": Languages.NOT_DEFINED,
|
|
799
|
-
"__label__nap": Languages.NOT_DEFINED,
|
|
800
|
-
"__label__gv": Languages.NOT_DEFINED,
|
|
801
|
-
"__label__av": Languages.NOT_DEFINED,
|
|
802
|
-
"__label__rue": Languages.NOT_DEFINED,
|
|
803
|
-
"__label__xal": Languages.NOT_DEFINED,
|
|
804
|
-
"__label__pfl": Languages.NOT_DEFINED,
|
|
805
|
-
"__label__dty": Languages.NOT_DEFINED,
|
|
806
|
-
"__label__hif": Languages.NOT_DEFINED,
|
|
807
|
-
"__label__co": Languages.NOT_DEFINED,
|
|
808
|
-
"__label__lrc": Languages.NOT_DEFINED,
|
|
809
|
-
"__label__vep": Languages.NOT_DEFINED,
|
|
810
|
-
"__label__tyv": Languages.NOT_DEFINED,
|
|
811
|
-
},
|
|
812
|
-
model_wrapper="FasttextLangDetector",
|
|
813
|
-
),
|
|
814
|
-
}
|
|
118
|
+
CATALOG: dict[str, ModelProfile] = {}
|
|
815
119
|
|
|
816
120
|
@staticmethod
|
|
817
121
|
def get_full_path_weights(name: PathLikeOrStr) -> PathLikeOrStr:
|
|
818
122
|
"""
|
|
819
123
|
Returns the absolute path of weights.
|
|
820
124
|
|
|
821
|
-
Note
|
|
822
|
-
|
|
125
|
+
Note:
|
|
126
|
+
Weights are sometimes not defined by only one artifact. The returned string will only represent one
|
|
127
|
+
weights artifact.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
name: model name
|
|
823
131
|
|
|
824
|
-
:
|
|
825
|
-
|
|
132
|
+
Returns:
|
|
133
|
+
absolute weight path
|
|
826
134
|
"""
|
|
827
135
|
try:
|
|
828
136
|
profile = ModelCatalog.get_profile(os.fspath(name))
|
|
@@ -847,14 +155,18 @@ class ModelCatalog:
|
|
|
847
155
|
@staticmethod
|
|
848
156
|
def get_full_path_configs(name: PathLikeOrStr) -> PathLikeOrStr:
|
|
849
157
|
"""
|
|
850
|
-
|
|
158
|
+
Absolute path of configs for some given weights. Alternatively, pass a path to a config file
|
|
851
159
|
(without the base path to the cache config directory).
|
|
852
160
|
|
|
853
|
-
Note
|
|
854
|
-
|
|
161
|
+
Note:
|
|
162
|
+
Configs are sometimes not defined by only one file. The returned string will only represent one
|
|
163
|
+
file.
|
|
855
164
|
|
|
856
|
-
:
|
|
857
|
-
|
|
165
|
+
Args:
|
|
166
|
+
name: model name
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Absolute path to the config
|
|
858
170
|
"""
|
|
859
171
|
try:
|
|
860
172
|
profile = ModelCatalog.get_profile(os.fspath(name))
|
|
@@ -876,8 +188,11 @@ class ModelCatalog:
|
|
|
876
188
|
Return the absolute path of preprocessor configs for some given weights. Preprocessor are occasionally provided
|
|
877
189
|
by the transformer library.
|
|
878
190
|
|
|
879
|
-
:
|
|
880
|
-
|
|
191
|
+
Args:
|
|
192
|
+
name: model name
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Absolute path to the preprocessor config
|
|
881
196
|
"""
|
|
882
197
|
|
|
883
198
|
try:
|
|
@@ -897,14 +212,16 @@ class ModelCatalog:
|
|
|
897
212
|
@staticmethod
|
|
898
213
|
def get_model_list() -> list[PathLikeOrStr]:
|
|
899
214
|
"""
|
|
900
|
-
Returns
|
|
215
|
+
Returns:
|
|
216
|
+
A list of absolute paths of registered models.
|
|
901
217
|
"""
|
|
902
218
|
return [os.path.join(get_weights_dir_path(), profile.name) for profile in ModelCatalog.CATALOG.values()]
|
|
903
219
|
|
|
904
220
|
@staticmethod
|
|
905
221
|
def get_profile_list() -> list[str]:
|
|
906
222
|
"""
|
|
907
|
-
Returns
|
|
223
|
+
Returns:
|
|
224
|
+
A list profile keys.
|
|
908
225
|
"""
|
|
909
226
|
return list(ModelCatalog.CATALOG.keys())
|
|
910
227
|
|
|
@@ -913,8 +230,11 @@ class ModelCatalog:
|
|
|
913
230
|
"""
|
|
914
231
|
Checks if some weights belong to a registered model
|
|
915
232
|
|
|
916
|
-
:
|
|
917
|
-
|
|
233
|
+
Args:
|
|
234
|
+
path_weights: relative or absolute path
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
`True` if the weights are registered in `ModelCatalog`
|
|
918
238
|
"""
|
|
919
239
|
if (ModelCatalog.get_full_path_weights(path_weights) in ModelCatalog.get_model_list()) or (
|
|
920
240
|
path_weights in ModelCatalog.get_model_list()
|
|
@@ -927,8 +247,11 @@ class ModelCatalog:
|
|
|
927
247
|
"""
|
|
928
248
|
Returns the profile of given model name, i.e. the config file, size and urls.
|
|
929
249
|
|
|
930
|
-
:
|
|
931
|
-
|
|
250
|
+
Args:
|
|
251
|
+
name: model name
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
A dict of model/weights profiles
|
|
932
255
|
"""
|
|
933
256
|
|
|
934
257
|
profile = ModelCatalog.CATALOG.get(name)
|
|
@@ -941,9 +264,10 @@ class ModelCatalog:
|
|
|
941
264
|
"""
|
|
942
265
|
Register a model with its profile
|
|
943
266
|
|
|
944
|
-
:
|
|
945
|
-
|
|
946
|
-
|
|
267
|
+
Args:
|
|
268
|
+
name: Name of the model. We use the file name of the model along with its path (starting from the
|
|
269
|
+
weights `.cache`. e.g. `my_model/model_123.pkl`.
|
|
270
|
+
profile: profile of the model
|
|
947
271
|
"""
|
|
948
272
|
if name in ModelCatalog.CATALOG:
|
|
949
273
|
raise KeyError("Model already registered")
|
|
@@ -952,9 +276,10 @@ class ModelCatalog:
|
|
|
952
276
|
@staticmethod
|
|
953
277
|
def load_profiles_from_file(path: Optional[PathLikeOrStr] = None) -> None:
|
|
954
278
|
"""
|
|
955
|
-
Load model profiles from a jsonl file and extend `CATALOG` with the new profiles.
|
|
279
|
+
Load model profiles from a `jsonl` file and extend `CATALOG` with the new profiles.
|
|
956
280
|
|
|
957
|
-
:
|
|
281
|
+
Args:
|
|
282
|
+
path: Path to the file. `None` is allowed but will do nothing.
|
|
958
283
|
"""
|
|
959
284
|
if not path:
|
|
960
285
|
return
|
|
@@ -968,9 +293,10 @@ class ModelCatalog:
|
|
|
968
293
|
@staticmethod
|
|
969
294
|
def save_profiles_to_file(target_path: PathLikeOrStr) -> None:
|
|
970
295
|
"""
|
|
971
|
-
Save model profiles to a jsonl file.
|
|
296
|
+
Save model profiles to a `jsonl` file.
|
|
972
297
|
|
|
973
|
-
:
|
|
298
|
+
Args:
|
|
299
|
+
target_path: Path to the file.
|
|
974
300
|
"""
|
|
975
301
|
with jsonlines.open(target_path, mode="w") as writer:
|
|
976
302
|
for profile in ModelCatalog.CATALOG.values():
|
|
@@ -978,6 +304,11 @@ class ModelCatalog:
|
|
|
978
304
|
writer.close()
|
|
979
305
|
|
|
980
306
|
|
|
307
|
+
# Loading default profiles
|
|
308
|
+
dd_profile_path = maybe_copy_config_to_cache(
|
|
309
|
+
get_package_path(), get_cache_dir_path(), "deepdoctection/configs/profiles.jsonl", False
|
|
310
|
+
)
|
|
311
|
+
ModelCatalog.load_profiles_from_file(dd_profile_path)
|
|
981
312
|
# Additional profiles can be added
|
|
982
313
|
ModelCatalog.load_profiles_from_file(os.environ.get("MODEL_CATALOG", None))
|
|
983
314
|
|
|
@@ -986,8 +317,11 @@ def get_tp_weight_names(name: str) -> list[str]:
|
|
|
986
317
|
"""
|
|
987
318
|
Given a path to some model weights it will return all file names according to TP naming convention
|
|
988
319
|
|
|
989
|
-
:
|
|
990
|
-
|
|
320
|
+
Args:
|
|
321
|
+
name: TP model name
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
A list of TP file names
|
|
991
325
|
"""
|
|
992
326
|
_, file_name = os.path.split(name)
|
|
993
327
|
prefix, _ = file_name.split(".")
|
|
@@ -1002,6 +336,11 @@ def print_model_infos(add_description: bool = True, add_config: bool = True, add
|
|
|
1002
336
|
"""
|
|
1003
337
|
Prints a table with all registered model profiles and some of their attributes (name, description, config and
|
|
1004
338
|
categories)
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
add_description: If `True`, the description of the model will be printed
|
|
342
|
+
add_config: If `True`, the config of the model will be printed
|
|
343
|
+
add_categories: If `True`, the categories of the model will be printed
|
|
1005
344
|
"""
|
|
1006
345
|
|
|
1007
346
|
profiles = ModelCatalog.CATALOG.values()
|
|
@@ -1038,8 +377,11 @@ class ModelDownloadManager:
|
|
|
1038
377
|
Class for organizing downloads of config files and weights from various sources. Internally, it will use model
|
|
1039
378
|
profiles to know where things are stored.
|
|
1040
379
|
|
|
380
|
+
Example:
|
|
381
|
+
```python
|
|
1041
382
|
# if you are not sure about the model name use the ModelCatalog
|
|
1042
383
|
ModelDownloadManager.maybe_download_weights_and_configs("layout/model-800000_inf_only.data-00000-of-00001")
|
|
384
|
+
```
|
|
1043
385
|
"""
|
|
1044
386
|
|
|
1045
387
|
@staticmethod
|
|
@@ -1048,8 +390,10 @@ class ModelDownloadManager:
|
|
|
1048
390
|
Check if some model is registered. If yes, it will check if their weights
|
|
1049
391
|
must be downloaded. Only weights that have not the same expected size will be downloaded again.
|
|
1050
392
|
|
|
1051
|
-
:
|
|
1052
|
-
|
|
393
|
+
Args:
|
|
394
|
+
name: A path to some model weights
|
|
395
|
+
Returns:
|
|
396
|
+
Absolute path to model weights, if model is registered
|
|
1053
397
|
"""
|
|
1054
398
|
|
|
1055
399
|
absolute_path_weights = ModelCatalog.get_full_path_weights(name)
|
|
@@ -1091,10 +435,11 @@ class ModelDownloadManager:
|
|
|
1091
435
|
Load a model from the Huggingface hub for a given profile and saves the model at the directory of the given
|
|
1092
436
|
path.
|
|
1093
437
|
|
|
1094
|
-
:
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
438
|
+
Args:
|
|
439
|
+
profile: Profile according to `ModelCatalog.get_profile(path_weights)`
|
|
440
|
+
absolute_path: Absolute path (incl. file name) of target file
|
|
441
|
+
file_names: Optionally, replace the file name of the `ModelCatalog`. This is necessary e.g. for Tensorpack
|
|
442
|
+
models
|
|
1098
443
|
"""
|
|
1099
444
|
repo_id = profile.hf_repo_id
|
|
1100
445
|
if repo_id is None:
|
|
@@ -1125,15 +470,16 @@ class ModelDownloadManager:
|
|
|
1125
470
|
Load config file(s) from the Huggingface hub for a given profile and saves the model at the directory of the
|
|
1126
471
|
given path.
|
|
1127
472
|
|
|
1128
|
-
:
|
|
1129
|
-
|
|
473
|
+
Args:
|
|
474
|
+
profile: Profile according to `ModelCatalog.get_profile(path_weights)`
|
|
475
|
+
absolute_path: Absolute path (incl. file name) of target file
|
|
1130
476
|
"""
|
|
1131
477
|
|
|
1132
478
|
repo_id = profile.hf_repo_id
|
|
1133
479
|
if repo_id is None:
|
|
1134
480
|
raise ValueError("hf_repo_id cannot be None")
|
|
1135
481
|
directory, _ = os.path.split(absolute_path)
|
|
1136
|
-
if
|
|
482
|
+
if profile.hf_config_file is None:
|
|
1137
483
|
raise ValueError("hf_config_file cannot be None")
|
|
1138
484
|
for file_name in profile.hf_config_file:
|
|
1139
485
|
ModelDownloadManager._load_from_hf_hub(repo_id, file_name, directory)
|