deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
  120. deepdoctection/analyzer/_config.py +0 -146
  121. deepdoctection-0.42.0.dist-info/METADATA +0 -431
  122. deepdoctection-0.42.0.dist-info/RECORD +0 -148
  123. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Module for ModelCatalog and ModelDownloadManager
19
+ `ModelCatalog` and`ModelDownloadManager`
20
20
  """
21
21
 
22
22
  import os
@@ -28,9 +28,16 @@ from huggingface_hub import hf_hub_download
28
28
  from tabulate import tabulate
29
29
  from termcolor import colored
30
30
 
31
- from ..utils.fs import download, get_configs_dir_path, get_weights_dir_path
31
+ from ..utils.fs import (
32
+ download,
33
+ get_cache_dir_path,
34
+ get_configs_dir_path,
35
+ get_package_path,
36
+ get_weights_dir_path,
37
+ maybe_copy_config_to_cache,
38
+ )
32
39
  from ..utils.logger import LoggingRecord, log_once, logger
33
- from ..utils.settings import CellType, Languages, LayoutType, ObjectTypes, get_type
40
+ from ..utils.settings import ObjectTypes, get_type
34
41
  from ..utils.types import PathLikeOrStr
35
42
 
36
43
  __all__ = ["ModelCatalog", "ModelDownloadManager", "print_model_infos", "ModelProfile"]
@@ -39,7 +46,7 @@ __all__ = ["ModelCatalog", "ModelDownloadManager", "print_model_infos", "ModelPr
39
46
  @dataclass(frozen=True)
40
47
  class ModelProfile:
41
48
  """
42
- Class for model profile. Add for each model one ModelProfile to the ModelCatalog
49
+ Class for model profile. Add for each model one `ModelProfile` to the `ModelCatalog`
43
50
  """
44
51
 
45
52
  name: str
@@ -58,10 +65,12 @@ class ModelProfile:
58
65
  dl_library: Optional[str] = field(default=None)
59
66
  model_wrapper: Optional[str] = field(default=None)
60
67
  architecture: Optional[str] = field(default=None)
68
+ padding: Optional[bool] = field(default=None)
61
69
 
62
70
  def as_dict(self) -> dict[str, Any]:
63
71
  """
64
- returns a dict of the dataclass
72
+ Returns:
73
+ A dict of the dataclass
65
74
  """
66
75
  return asdict(self)
67
76
 
@@ -72,757 +81,56 @@ class ModelCatalog:
72
81
 
73
82
  To get an overview of all registered models
74
83
 
84
+ Example:
85
+ ```python
75
86
  print(ModelCatalog.get_model_list())
87
+ ```
76
88
 
77
89
  To get a model card for some specific model:
78
90
 
91
+ Example:
92
+ ```python
79
93
  profile = ModelCatalog.get_profile("layout/model-800000_inf_only.data-00000-of-00001")
80
94
  print(profile.description)
95
+ ```
81
96
 
82
97
  Some models will have their weights and configs stored in the cache. To instantiate predictors one will sometimes
83
98
  need their path. Use
84
99
 
100
+ Example:
101
+ ```python
85
102
  path_weights = ModelCatalog.get_full_path_configs("layout/model-800000_inf_only.data-00000-of-00001")
86
103
  path_configs = ModelCatalog.get_full_path_weights("layout/model-800000_inf_only.data-00000-of-00001")
104
+ ```
87
105
 
88
106
  To register a new model
89
107
 
108
+ Example:
109
+ ```python
90
110
  ModelCatalog.get_full_path_configs("my_new_model")
111
+ ```
112
+
113
+ Attributes:
114
+ CATALOG (dict[str, ModelProfile]): A dict of model profiles. The key is the model name and the value is a
115
+ `ModelProfile` object.
91
116
  """
92
117
 
93
- CATALOG: dict[str, ModelProfile] = {
94
- "layout/model-800000_inf_only.data-00000-of-00001": ModelProfile(
95
- name="layout/model-800000_inf_only.data-00000-of-00001",
96
- description="Tensorpack layout model for inference purposes trained on Publaynet",
97
- config="dd/tp/conf_frcnn_layout.yaml",
98
- size=[274552244, 7907],
99
- tp_model=True,
100
- hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
101
- hf_model_name="model-800000_inf_only",
102
- hf_config_file=["conf_frcnn_layout.yaml"],
103
- categories={
104
- 1: LayoutType.TEXT,
105
- 2: LayoutType.TITLE,
106
- 3: LayoutType.LIST,
107
- 4: LayoutType.TABLE,
108
- 5: LayoutType.FIGURE,
109
- },
110
- dl_library="TF",
111
- model_wrapper="TPFrcnnDetector",
112
- ),
113
- "cell/model-1800000_inf_only.data-00000-of-00001": ModelProfile(
114
- name="cell/model-1800000_inf_only.data-00000-of-00001",
115
- description="Tensorpack cell detection model for inference purposes trained on Pubtabnet",
116
- config="dd/tp/conf_frcnn_cell.yaml",
117
- size=[274503056, 8056],
118
- tp_model=True,
119
- hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
120
- hf_model_name="model-1800000_inf_only",
121
- hf_config_file=["conf_frcnn_cell.yaml"],
122
- categories={1: LayoutType.CELL},
123
- dl_library="TF",
124
- model_wrapper="TPFrcnnDetector",
125
- ),
126
- "item/model-1620000_inf_only.data-00000-of-00001": ModelProfile(
127
- name="item/model-1620000_inf_only.data-00000-of-00001",
128
- description="Tensorpack row/column detection model for inference purposes trained on Pubtabnet",
129
- config="dd/tp/conf_frcnn_rows.yaml",
130
- size=[274515344, 7904],
131
- tp_model=True,
132
- hf_repo_id="deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
133
- hf_model_name="model-1620000_inf_only",
134
- hf_config_file=["conf_frcnn_rows.yaml"],
135
- categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
136
- dl_library="TF",
137
- model_wrapper="TPFrcnnDetector",
138
- ),
139
- "layout/d2_model_0829999_layout_inf_only.pt": ModelProfile(
140
- name="layout/d2_model_0829999_layout_inf_only.pt",
141
- description="Detectron2 layout detection model trained on Publaynet",
142
- config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml",
143
- size=[274632215],
144
- tp_model=False,
145
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
146
- hf_model_name="d2_model_0829999_layout_inf_only.pt",
147
- hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
148
- categories={
149
- 1: LayoutType.TEXT,
150
- 2: LayoutType.TITLE,
151
- 3: LayoutType.LIST,
152
- 4: LayoutType.TABLE,
153
- 5: LayoutType.FIGURE,
154
- },
155
- dl_library="PT",
156
- model_wrapper="D2FrcnnDetector",
157
- ),
158
- "layout/d2_model_0829999_layout_inf_only.ts": ModelProfile(
159
- name="layout/d2_model_0829999_layout_inf_only.ts",
160
- description="Detectron2 layout detection model trained on Publaynet. Torchscript export",
161
- config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN_TS.yaml",
162
- size=[274974842],
163
- tp_model=False,
164
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
165
- hf_model_name="d2_model_0829999_layout_inf_only.ts",
166
- hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
167
- categories={
168
- 1: LayoutType.TEXT,
169
- 2: LayoutType.TITLE,
170
- 3: LayoutType.LIST,
171
- 4: LayoutType.TABLE,
172
- 5: LayoutType.FIGURE,
173
- },
174
- dl_library="PT",
175
- model_wrapper="D2FrcnnTracingDetector",
176
- ),
177
- "cell/d2_model_1849999_cell_inf_only.pt": ModelProfile(
178
- name="cell/d2_model_1849999_cell_inf_only.pt",
179
- description="Detectron2 cell detection inference only model trained on Pubtabnet",
180
- config="dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml",
181
- size=[274583063],
182
- tp_model=False,
183
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
184
- hf_model_name="d2_model_1849999_cell_inf_only.pt",
185
- hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
186
- categories={1: LayoutType.CELL},
187
- dl_library="PT",
188
- model_wrapper="D2FrcnnDetector",
189
- ),
190
- "cell/d2_model_1849999_cell_inf_only.ts": ModelProfile(
191
- name="cell/d2_model_1849999_cell_inf_only.ts",
192
- description="Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export",
193
- config="dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN_TS.yaml",
194
- size=[274898682],
195
- tp_model=False,
196
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
197
- hf_model_name="d2_model_1849999_cell_inf_only.ts",
198
- hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
199
- categories={1: LayoutType.CELL},
200
- dl_library="PT",
201
- model_wrapper="D2FrcnnTracingDetector",
202
- ),
203
- "item/d2_model_1639999_item_inf_only.pt": ModelProfile(
204
- name="item/d2_model_1639999_item_inf_only.pt",
205
- description="Detectron2 item detection model inference only trained on Pubtabnet",
206
- config="dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml",
207
- size=[274595351],
208
- tp_model=False,
209
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
210
- hf_model_name="d2_model_1639999_item_inf_only.pt",
211
- hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
212
- categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
213
- dl_library="PT",
214
- model_wrapper="D2FrcnnDetector",
215
- ),
216
- "item/d2_model_1639999_item_inf_only.ts": ModelProfile(
217
- name="item/d2_model_1639999_item_inf_only.ts",
218
- description="Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export",
219
- config="dd/d2/item/CASCADE_RCNN_R_50_FPN_GN_TS.yaml",
220
- size=[274910970],
221
- tp_model=False,
222
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
223
- hf_model_name="d2_model_1639999_item_inf_only.ts",
224
- hf_config_file=["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"],
225
- categories={1: LayoutType.ROW, 2: LayoutType.COLUMN},
226
- dl_library="PT",
227
- model_wrapper="D2FrcnnTracingDetector",
228
- ),
229
- "nielsr/lilt-xlm-roberta-base/pytorch_model.bin": ModelProfile(
230
- name="nielsr/lilt-xlm-roberta-base/pytorch_model.bin",
231
- description="LiLT build with a RobertaXLM base model",
232
- config="nielsr/lilt-xlm-roberta-base/config.json",
233
- size=[1136743583],
234
- tp_model=False,
235
- hf_repo_id="nielsr/lilt-xlm-roberta-base",
236
- hf_model_name="pytorch_model.bin",
237
- hf_config_file=["config.json"],
238
- dl_library="PT",
239
- ),
240
- "SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin": ModelProfile(
241
- name="SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin",
242
- description="Language-Independent Layout Transformer - InfoXLM model by stitching a pre-trained InfoXLM"
243
- " and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced"
244
- " in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for"
245
- " Structured Document Understanding by Wang et al. and first released in this repository.",
246
- config="SCUT-DLVCLab/lilt-infoxlm-base/config.json",
247
- size=[1136743583],
248
- tp_model=False,
249
- hf_repo_id="SCUT-DLVCLab/lilt-infoxlm-base",
250
- hf_model_name="pytorch_model.bin",
251
- hf_config_file=["config.json"],
252
- dl_library="PT",
253
- ),
254
- "SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin": ModelProfile(
255
- name="SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin",
256
- description="Language-Independent Layout Transformer - RoBERTa model by stitching a pre-trained RoBERTa"
257
- " (English) and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was"
258
- " introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer"
259
- " for Structured Document Understanding by Wang et al. and first released in this repository.",
260
- config="SCUT-DLVCLab/lilt-roberta-en-base/config.json",
261
- size=[523151519],
262
- tp_model=False,
263
- hf_repo_id="SCUT-DLVCLab/lilt-roberta-en-base",
264
- hf_model_name="pytorch_model.bin",
265
- hf_config_file=["config.json"],
266
- dl_library="PT",
267
- ),
268
- "microsoft/layoutlm-base-uncased/pytorch_model.bin": ModelProfile(
269
- name="microsoft/layoutlm-base-uncased/pytorch_model.bin",
270
- description="LayoutLM is a simple but effective pre-training method of text and layout for document image"
271
- " understanding and information extraction tasks, such as form understanding and receipt"
272
- " understanding. LayoutLM archived the SOTA results on multiple datasets. This model does not"
273
- "contain any head and has to be fine tuned on a downstream task. This is model has been trained "
274
- "on 11M documents for 2 epochs. Configuration: 12-layer, 768-hidden, 12-heads, 113M parameters",
275
- size=[453093832],
276
- tp_model=False,
277
- config="microsoft/layoutlm-base-uncased/config.json",
278
- hf_repo_id="microsoft/layoutlm-base-uncased",
279
- hf_model_name="pytorch_model.bin",
280
- hf_config_file=["config.json"],
281
- dl_library="PT",
282
- ),
283
- "microsoft/layoutlm-large-uncased/pytorch_model.bin": ModelProfile(
284
- name="microsoft/layoutlm-large-uncased/pytorch_model.bin",
285
- description="LayoutLM is a simple but effective pre-training method of text and layout for document image"
286
- " understanding and information extraction tasks, such as form understanding and receipt"
287
- " understanding. LayoutLM archived the SOTA results on multiple datasets. This model does not"
288
- "contain any head and has to be fine tuned on a downstream task. This is model has been trained"
289
- " on 11M documents for 2 epochs. Configuration: 24-layer, 1024-hidden, 16-heads, 343M parameters",
290
- size=[1361845448],
291
- tp_model=False,
292
- config="microsoft/layoutlm-large-uncased/config.json",
293
- hf_repo_id="microsoft/layoutlm-large-uncased",
294
- hf_model_name="pytorch_model.bin",
295
- hf_config_file=["config.json"],
296
- dl_library="PT",
297
- ),
298
- "microsoft/layoutlmv2-base-uncased/pytorch_model.bin": ModelProfile(
299
- name="microsoft/layoutlmv2-base-uncased/pytorch_model.bin",
300
- description="LayoutLMv2 is an improved version of LayoutLM with new pre-training tasks to model the"
301
- " interaction among text, layout, and image in a single multi-modal framework. It outperforms"
302
- " strong baselines and achieves new state-of-the-art results on a wide variety of downstream"
303
- " visually-rich document understanding tasks, including , including FUNSD (0.7895 → 0.8420),"
304
- " CORD (0.9493 → 0.9601), SROIE (0.9524 → 0.9781), Kleister-NDA (0.834 → 0.852), RVL-CDIP"
305
- " (0.9443 → 0.9564), and DocVQA (0.7295 → 0.8672). The license is cc-by-nc-sa-4.0",
306
- size=[802243295],
307
- tp_model=False,
308
- config="microsoft/layoutlmv2-base-uncased/config.json",
309
- hf_repo_id="microsoft/layoutlmv2-base-uncased",
310
- hf_model_name="pytorch_model.bin",
311
- hf_config_file=["config.json"],
312
- dl_library="PT",
313
- ),
314
- "microsoft/layoutxlm-base/pytorch_model.bin": ModelProfile(
315
- name="microsoft/layoutxlm-base/pytorch_model.bin",
316
- description="Multimodal pre-training with text, layout, and image has achieved SOTA performance for "
317
- "visually-rich document understanding tasks recently, which demonstrates the great potential"
318
- " for joint learning across different modalities. In this paper, we present LayoutXLM, a"
319
- " multimodal pre-trained model for multilingual document understanding, which aims to bridge"
320
- " the language barriers for visually-rich document understanding. To accurately evaluate"
321
- " LayoutXLM, we also introduce a multilingual form understanding benchmark dataset named XFUN,"
322
- " which includes form understanding samples in 7 languages (Chinese, Japanese, Spanish, French,"
323
- " Italian, German, Portuguese), and key-value pairs are manually labeled for each language."
324
- " Experiment results show that the LayoutXLM model has significantly outperformed the existing"
325
- " SOTA cross-lingual pre-trained models on the XFUN dataset. The license is cc-by-nc-sa-4.0",
326
- size=[1476537178],
327
- tp_model=False,
328
- config="microsoft/layoutxlm-base/config.json",
329
- hf_repo_id="microsoft/layoutxlm-base",
330
- hf_model_name="pytorch_model.bin",
331
- hf_config_file=["config.json"],
332
- dl_library="PT",
333
- ),
334
- "microsoft/layoutlmv3-base/pytorch_model.bin": ModelProfile(
335
- name="microsoft/layoutlmv3-base/pytorch_model.bin",
336
- description="LayoutLMv3 is a pre-trained multimodal Transformer for Document AI with unified text and"
337
- " image masking. The simple unified architecture and training objectives make LayoutLMv3 a"
338
- " general-purpose pre-trained model. For example, LayoutLMv3 can be fine-tuned for both"
339
- " text-centric tasks, including form understanding, receipt understanding, and document"
340
- " visual question answering, and image-centric tasks such as document image classification"
341
- " and document layout analysis. The license is cc-by-nc-sa-4.0",
342
- size=[501380823],
343
- tp_model=False,
344
- config="microsoft/layoutlmv3-base/config.json",
345
- hf_repo_id="microsoft/layoutlmv3-base",
346
- hf_model_name="pytorch_model.bin",
347
- hf_config_file=["config.json"],
348
- dl_library="PT",
349
- ),
350
- "microsoft/table-transformer-detection/pytorch_model.bin": ModelProfile(
351
- name="microsoft/table-transformer-detection/pytorch_model.bin",
352
- description="Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper "
353
- "PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et "
354
- "al. This model is devoted to table detection",
355
- size=[115393245],
356
- tp_model=False,
357
- config="microsoft/table-transformer-detection/config.json",
358
- preprocessor_config="microsoft/table-transformer-detection/preprocessor_config.json",
359
- hf_repo_id="microsoft/table-transformer-detection",
360
- hf_model_name="pytorch_model.bin",
361
- hf_config_file=["config.json", "preprocessor_config.json"],
362
- categories={1: LayoutType.TABLE, 2: LayoutType.TABLE_ROTATED},
363
- dl_library="PT",
364
- model_wrapper="HFDetrDerivedDetector",
365
- ),
366
- "microsoft/table-transformer-structure-recognition/pytorch_model.bin": ModelProfile(
367
- name="microsoft/table-transformer-structure-recognition/pytorch_model.bin",
368
- description="Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper "
369
- "PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et "
370
- "al. This model is devoted to table structure recognition and assumes to receive a cropped"
371
- "table as input. It will predict rows, column and spanning cells",
372
- size=[115509981],
373
- tp_model=False,
374
- config="microsoft/table-transformer-structure-recognition/config.json",
375
- preprocessor_config="microsoft/table-transformer-structure-recognition/preprocessor_config.json",
376
- hf_repo_id="microsoft/table-transformer-structure-recognition",
377
- hf_model_name="pytorch_model.bin",
378
- hf_config_file=["config.json", "preprocessor_config.json"],
379
- categories={
380
- 1: LayoutType.TABLE,
381
- 2: LayoutType.COLUMN,
382
- 3: LayoutType.ROW,
383
- 4: CellType.COLUMN_HEADER,
384
- 5: CellType.PROJECTED_ROW_HEADER,
385
- 6: CellType.SPANNING,
386
- },
387
- dl_library="PT",
388
- model_wrapper="HFDetrDerivedDetector",
389
- ),
390
- "doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt": ModelProfile(
391
- name="doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt",
392
- description="Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable "
393
- "Binarization”. For more information please check "
394
- "https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.",
395
- size=[101971449],
396
- urls=["https://doctr-static.mindee.com/models?id=v0.3.1/db_resnet50-ac60cadc.pt&src=0"],
397
- categories={1: LayoutType.WORD},
398
- dl_library="PT",
399
- model_wrapper="DoctrTextlineDetector",
400
- architecture="db_resnet50",
401
- ),
402
- "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip": ModelProfile(
403
- name="doctr/db_resnet50/tf/db_resnet50-adcafc63.zip",
404
- description="Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable "
405
- "Binarization”. For more information please check "
406
- "https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.",
407
- size=[94178964],
408
- urls=["https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0"],
409
- categories={1: LayoutType.WORD},
410
- dl_library="TF",
411
- model_wrapper="DoctrTextlineDetector",
412
- architecture="db_resnet50",
413
- ),
414
- "doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt": ModelProfile(
415
- name="doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt",
416
- description="Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based "
417
- "Sequence Recognition and Its Application to Scene Text Recognition”. For more information "
418
- "please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch "
419
- "artefact.",
420
- size=[63286381],
421
- urls=["https://doctr-static.mindee.com/models?id=v0.3.1/crnn_vgg16_bn-9762b0b0.pt&src=0"],
422
- dl_library="PT",
423
- model_wrapper="DoctrTextRecognizer",
424
- architecture="crnn_vgg16_bn",
425
- ),
426
- "doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip": ModelProfile(
427
- name="doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip",
428
- description="Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based "
429
- "Sequence Recognition and Its Application to Scene Text Recognition”. For more information "
430
- "please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow "
431
- "artefact.",
432
- size=[58758994],
433
- urls=["https://doctr-static.mindee.com/models?id=v0.3.0/crnn_vgg16_bn-76b7f2c6.zip&src=0"],
434
- dl_library="TF",
435
- model_wrapper="DoctrTextRecognizer",
436
- architecture="crnn_vgg16_bn",
437
- ),
438
- "FacebookAI/xlm-roberta-base": ModelProfile(
439
- name="FacebookAI/xlm-roberta-base/pytorch_model.bin",
440
- description="XLM-RoBERTa model pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages."
441
- " It was introduced in the paper Unsupervised Cross-lingual Representation Learning at Scale"
442
- " by Conneau et al. and first released in this repository.",
443
- size=[1115590446],
444
- tp_model=False,
445
- config="FacebookAI/xlm-roberta-base/config.json",
446
- hf_repo_id="FacebookAI/xlm-roberta-base",
447
- hf_model_name="pytorch_model.bin",
448
- hf_config_file=["config.json"],
449
- dl_library="PT",
450
- ),
451
- "fasttext/lid.176.bin": ModelProfile(
452
- name="fasttext/lid.176.bin",
453
- description="Fasttext language detection model",
454
- size=[131266198],
455
- urls=["https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"],
456
- categories={
457
- 1: Languages.ENGLISH,
458
- 2: Languages.RUSSIAN,
459
- 3: Languages.GERMAN,
460
- 4: Languages.FRENCH,
461
- 5: Languages.ITALIAN,
462
- 6: Languages.JAPANESE,
463
- 7: Languages.SPANISH,
464
- 8: Languages.CEBUANO,
465
- 9: Languages.TURKISH,
466
- 10: Languages.PORTUGUESE,
467
- 11: Languages.UKRAINIAN,
468
- 12: Languages.ESPERANTO,
469
- 13: Languages.POLISH,
470
- 14: Languages.SWEDISH,
471
- 15: Languages.DUTCH,
472
- 16: Languages.HEBREW,
473
- 17: Languages.CHINESE,
474
- 18: Languages.HUNGARIAN,
475
- 19: Languages.ARABIC,
476
- 20: Languages.CATALAN,
477
- 21: Languages.FINNISH,
478
- 22: Languages.CZECH,
479
- 23: Languages.PERSIAN,
480
- 24: Languages.SERBIAN,
481
- 25: Languages.GREEK,
482
- 26: Languages.VIETNAMESE,
483
- 27: Languages.BULGARIAN,
484
- 28: Languages.KOREAN,
485
- 29: Languages.NORWEGIAN,
486
- 30: Languages.MACEDONIAN,
487
- 31: Languages.ROMANIAN,
488
- 32: Languages.INDONESIAN,
489
- 33: Languages.THAI,
490
- 34: Languages.ARMENIAN,
491
- 35: Languages.DANISH,
492
- 36: Languages.TAMIL,
493
- 37: Languages.HINDI,
494
- 38: Languages.CROATIAN,
495
- 39: Languages.NOT_DEFINED,
496
- 40: Languages.BELARUSIAN,
497
- 41: Languages.GEORGIAN,
498
- 42: Languages.TELUGU,
499
- 43: Languages.KAZAKH,
500
- 44: Languages.WARAY,
501
- 45: Languages.LITHUANIAN,
502
- 46: Languages.SCOTTISH,
503
- 47: Languages.SLOVAK,
504
- 48: Languages.BENIN,
505
- 49: Languages.BASQUE,
506
- 50: Languages.SLOVENIAN,
507
- 51: Languages.NOT_DEFINED,
508
- 52: Languages.MALAYALAM,
509
- 53: Languages.MARATHI,
510
- 54: Languages.ESTONIAN,
511
- 55: Languages.AZERBAIJANI,
512
- 56: Languages.NOT_DEFINED,
513
- 57: Languages.ALBANIAN,
514
- 58: Languages.LATIN,
515
- 59: Languages.BOSNIAN,
516
- 60: Languages.NORWEGIAN_NOVOSIBIRSK,
517
- 61: Languages.URDU,
518
- 62: Languages.NOT_DEFINED,
519
- 63: Languages.NOT_DEFINED,
520
- 64: Languages.NOT_DEFINED,
521
- 65: Languages.NOT_DEFINED,
522
- 66: Languages.NOT_DEFINED,
523
- 67: Languages.NOT_DEFINED,
524
- 68: Languages.NOT_DEFINED,
525
- 69: Languages.NOT_DEFINED,
526
- 70: Languages.NOT_DEFINED,
527
- 71: Languages.NOT_DEFINED,
528
- 72: Languages.NOT_DEFINED,
529
- 73: Languages.NOT_DEFINED,
530
- 74: Languages.NOT_DEFINED,
531
- 75: Languages.NOT_DEFINED,
532
- 76: Languages.NOT_DEFINED,
533
- 77: Languages.NOT_DEFINED,
534
- 78: Languages.NOT_DEFINED,
535
- 79: Languages.NOT_DEFINED,
536
- 80: Languages.NOT_DEFINED,
537
- 81: Languages.NOT_DEFINED,
538
- 82: Languages.NOT_DEFINED,
539
- 83: Languages.NOT_DEFINED,
540
- 84: Languages.NOT_DEFINED,
541
- 85: Languages.NOT_DEFINED,
542
- 86: Languages.NOT_DEFINED,
543
- 87: Languages.NOT_DEFINED,
544
- 88: Languages.NOT_DEFINED,
545
- 89: Languages.NOT_DEFINED,
546
- 90: Languages.NOT_DEFINED,
547
- 91: Languages.NOT_DEFINED,
548
- 92: Languages.NOT_DEFINED,
549
- 93: Languages.NOT_DEFINED,
550
- 94: Languages.NOT_DEFINED,
551
- 95: Languages.NOT_DEFINED,
552
- 96: Languages.NOT_DEFINED,
553
- 97: Languages.NOT_DEFINED,
554
- 98: Languages.NOT_DEFINED,
555
- 99: Languages.NOT_DEFINED,
556
- 100: Languages.NOT_DEFINED,
557
- 101: Languages.NOT_DEFINED,
558
- 102: Languages.NOT_DEFINED,
559
- 103: Languages.NOT_DEFINED,
560
- 104: Languages.NOT_DEFINED,
561
- 105: Languages.NOT_DEFINED,
562
- 106: Languages.NOT_DEFINED,
563
- 107: Languages.NOT_DEFINED,
564
- 108: Languages.NOT_DEFINED,
565
- 109: Languages.NOT_DEFINED,
566
- 110: Languages.NOT_DEFINED,
567
- 111: Languages.NOT_DEFINED,
568
- 112: Languages.NOT_DEFINED,
569
- 113: Languages.NOT_DEFINED,
570
- 114: Languages.NOT_DEFINED,
571
- 115: Languages.NOT_DEFINED,
572
- 116: Languages.NOT_DEFINED,
573
- 117: Languages.NOT_DEFINED,
574
- 118: Languages.NOT_DEFINED,
575
- 119: Languages.NOT_DEFINED,
576
- 120: Languages.NOT_DEFINED,
577
- 121: Languages.NOT_DEFINED,
578
- 122: Languages.NOT_DEFINED,
579
- 123: Languages.NOT_DEFINED,
580
- 124: Languages.NOT_DEFINED,
581
- 125: Languages.NOT_DEFINED,
582
- 126: Languages.NOT_DEFINED,
583
- 127: Languages.NOT_DEFINED,
584
- 128: Languages.NOT_DEFINED,
585
- 129: Languages.NOT_DEFINED,
586
- 130: Languages.NOT_DEFINED,
587
- 131: Languages.NOT_DEFINED,
588
- 132: Languages.NOT_DEFINED,
589
- 133: Languages.NOT_DEFINED,
590
- 134: Languages.NOT_DEFINED,
591
- 135: Languages.NOT_DEFINED,
592
- 136: Languages.NOT_DEFINED,
593
- 137: Languages.NOT_DEFINED,
594
- 138: Languages.NOT_DEFINED,
595
- 139: Languages.NOT_DEFINED,
596
- 140: Languages.NOT_DEFINED,
597
- 141: Languages.NOT_DEFINED,
598
- 142: Languages.NOT_DEFINED,
599
- 143: Languages.NOT_DEFINED,
600
- 144: Languages.NOT_DEFINED,
601
- 145: Languages.NOT_DEFINED,
602
- 146: Languages.NOT_DEFINED,
603
- 147: Languages.NOT_DEFINED,
604
- 148: Languages.NOT_DEFINED,
605
- 149: Languages.NOT_DEFINED,
606
- 150: Languages.NOT_DEFINED,
607
- 151: Languages.NOT_DEFINED,
608
- 152: Languages.NOT_DEFINED,
609
- 153: Languages.NOT_DEFINED,
610
- 154: Languages.NOT_DEFINED,
611
- 155: Languages.NOT_DEFINED,
612
- 156: Languages.NOT_DEFINED,
613
- 157: Languages.NOT_DEFINED,
614
- 158: Languages.NOT_DEFINED,
615
- 159: Languages.NOT_DEFINED,
616
- 160: Languages.NOT_DEFINED,
617
- 161: Languages.NOT_DEFINED,
618
- 162: Languages.NOT_DEFINED,
619
- 163: Languages.NOT_DEFINED,
620
- 164: Languages.NOT_DEFINED,
621
- 165: Languages.NOT_DEFINED,
622
- 166: Languages.NOT_DEFINED,
623
- 167: Languages.NOT_DEFINED,
624
- 168: Languages.NOT_DEFINED,
625
- 169: Languages.NOT_DEFINED,
626
- 170: Languages.NOT_DEFINED,
627
- 171: Languages.NOT_DEFINED,
628
- 172: Languages.NOT_DEFINED,
629
- 173: Languages.NOT_DEFINED,
630
- 174: Languages.NOT_DEFINED,
631
- 175: Languages.NOT_DEFINED,
632
- 176: Languages.NOT_DEFINED,
633
- },
634
- categories_orig={
635
- "__label__en": Languages.ENGLISH,
636
- "__label__ru": Languages.RUSSIAN,
637
- "__label__de": Languages.GERMAN,
638
- "__label__fr": Languages.FRENCH,
639
- "__label__it": Languages.ITALIAN,
640
- "__label__ja": Languages.JAPANESE,
641
- "__label__es": Languages.SPANISH,
642
- "__label__ceb": Languages.CEBUANO,
643
- "__label__tr": Languages.TURKISH,
644
- "__label__pt": Languages.PORTUGUESE,
645
- "__label__uk": Languages.UKRAINIAN,
646
- "__label__eo": Languages.ESPERANTO,
647
- "__label__pl": Languages.POLISH,
648
- "__label__sv": Languages.SWEDISH,
649
- "__label__nl": Languages.DUTCH,
650
- "__label__he": Languages.HEBREW,
651
- "__label__zh": Languages.CHINESE,
652
- "__label__hu": Languages.HUNGARIAN,
653
- "__label__ar": Languages.ARABIC,
654
- "__label__ca": Languages.CATALAN,
655
- "__label__fi": Languages.FINNISH,
656
- "__label__cs": Languages.CZECH,
657
- "__label__fa": Languages.PERSIAN,
658
- "__label__sr": Languages.SERBIAN,
659
- "__label__el": Languages.GREEK,
660
- "__label__vi": Languages.VIETNAMESE,
661
- "__label__bg": Languages.BULGARIAN,
662
- "__label__ko": Languages.KOREAN,
663
- "__label__no": Languages.NORWEGIAN,
664
- "__label__mk": Languages.MACEDONIAN,
665
- "__label__ro": Languages.ROMANIAN,
666
- "__label__id": Languages.INDONESIAN,
667
- "__label__th": Languages.THAI,
668
- "__label__hy": Languages.ARMENIAN,
669
- "__label__da": Languages.DANISH,
670
- "__label__ta": Languages.TAMIL,
671
- "__label__hi": Languages.HINDI,
672
- "__label__hr": Languages.CROATIAN,
673
- "__label__sh": Languages.NOT_DEFINED,
674
- "__label__be": Languages.BELARUSIAN,
675
- "__label__ka": Languages.GEORGIAN,
676
- "__label__te": Languages.TELUGU,
677
- "__label__kk": Languages.KAZAKH,
678
- "__label__war": Languages.WARAY,
679
- "__label__lt": Languages.LITHUANIAN,
680
- "__label__gl": Languages.SCOTTISH,
681
- "__label__sk": Languages.SLOVAK,
682
- "__label__bn": Languages.BENIN,
683
- "__label__eu": Languages.BASQUE,
684
- "__label__sl": Languages.SLOVENIAN,
685
- "__label__kn": Languages.NOT_DEFINED,
686
- "__label__ml": Languages.MALAYALAM,
687
- "__label__mr": Languages.MARATHI,
688
- "__label__et": Languages.ESTONIAN,
689
- "__label__az": Languages.AZERBAIJANI,
690
- "__label__ms": Languages.NOT_DEFINED,
691
- "__label__sq": Languages.ALBANIAN,
692
- "__label__la": Languages.LATIN,
693
- "__label__bs": Languages.BOSNIAN,
694
- "__label__nn": Languages.NORWEGIAN_NOVOSIBIRSK,
695
- "__label__ur": Languages.URDU,
696
- "__label__lv": Languages.NOT_DEFINED,
697
- "__label__my": Languages.NOT_DEFINED,
698
- "__label__tt": Languages.NOT_DEFINED,
699
- "__label__af": Languages.NOT_DEFINED,
700
- "__label__oc": Languages.NOT_DEFINED,
701
- "__label__nds": Languages.NOT_DEFINED,
702
- "__label__ky": Languages.NOT_DEFINED,
703
- "__label__ast": Languages.NOT_DEFINED,
704
- "__label__tl": Languages.NOT_DEFINED,
705
- "__label__is": Languages.NOT_DEFINED,
706
- "__label__ia": Languages.NOT_DEFINED,
707
- "__label__si": Languages.NOT_DEFINED,
708
- "__label__gu": Languages.NOT_DEFINED,
709
- "__label__km": Languages.NOT_DEFINED,
710
- "__label__br": Languages.NOT_DEFINED,
711
- "__label__ba": Languages.NOT_DEFINED,
712
- "__label__uz": Languages.NOT_DEFINED,
713
- "__label__bo": Languages.NOT_DEFINED,
714
- "__label__pa": Languages.NOT_DEFINED,
715
- "__label__vo": Languages.NOT_DEFINED,
716
- "__label__als": Languages.NOT_DEFINED,
717
- "__label__ne": Languages.NOT_DEFINED,
718
- "__label__cy": Languages.NOT_DEFINED,
719
- "__label__jbo": Languages.NOT_DEFINED,
720
- "__label__fy": Languages.NOT_DEFINED,
721
- "__label__mn": Languages.NOT_DEFINED,
722
- "__label__lb": Languages.NOT_DEFINED,
723
- "__label__ce": Languages.NOT_DEFINED,
724
- "__label__ug": Languages.NOT_DEFINED,
725
- "__label__tg": Languages.NOT_DEFINED,
726
- "__label__sco": Languages.NOT_DEFINED,
727
- "__label__sa": Languages.NOT_DEFINED,
728
- "__label__cv": Languages.NOT_DEFINED,
729
- "__label__jv": Languages.NOT_DEFINED,
730
- "__label__min": Languages.NOT_DEFINED,
731
- "__label__io": Languages.NOT_DEFINED,
732
- "__label__or": Languages.NOT_DEFINED,
733
- "__label__as": Languages.NOT_DEFINED,
734
- "__label__new": Languages.NOT_DEFINED,
735
- "__label__ga": Languages.NOT_DEFINED,
736
- "__label__mg": Languages.NOT_DEFINED,
737
- "__label__an": Languages.NOT_DEFINED,
738
- "__label__ckb": Languages.NOT_DEFINED,
739
- "__label__sw": Languages.NOT_DEFINED,
740
- "__label__bar": Languages.NOT_DEFINED,
741
- "__label__lmo": Languages.NOT_DEFINED,
742
- "__label__yi": Languages.NOT_DEFINED,
743
- "__label__arz": Languages.NOT_DEFINED,
744
- "__label__mhr": Languages.NOT_DEFINED,
745
- "__label__azb": Languages.NOT_DEFINED,
746
- "__label__sah": Languages.NOT_DEFINED,
747
- "__label__pnb": Languages.NOT_DEFINED,
748
- "__label__su": Languages.NOT_DEFINED,
749
- "__label__bpy": Languages.NOT_DEFINED,
750
- "__label__pms": Languages.NOT_DEFINED,
751
- "__label__ilo": Languages.NOT_DEFINED,
752
- "__label__wuu": Languages.NOT_DEFINED,
753
- "__label__ku": Languages.NOT_DEFINED,
754
- "__label__ps": Languages.NOT_DEFINED,
755
- "__label__ie": Languages.NOT_DEFINED,
756
- "__label__xmf": Languages.NOT_DEFINED,
757
- "__label__yue": Languages.NOT_DEFINED,
758
- "__label__gom": Languages.NOT_DEFINED,
759
- "__label__li": Languages.NOT_DEFINED,
760
- "__label__mwl": Languages.NOT_DEFINED,
761
- "__label__kw": Languages.NOT_DEFINED,
762
- "__label__sd": Languages.NOT_DEFINED,
763
- "__label__hsb": Languages.NOT_DEFINED,
764
- "__label__scn": Languages.NOT_DEFINED,
765
- "__label__gd": Languages.NOT_DEFINED,
766
- "__label__pam": Languages.NOT_DEFINED,
767
- "__label__bh": Languages.NOT_DEFINED,
768
- "__label__mai": Languages.NOT_DEFINED,
769
- "__label__vec": Languages.NOT_DEFINED,
770
- "__label__mt": Languages.NOT_DEFINED,
771
- "__label__dv": Languages.NOT_DEFINED,
772
- "__label__wa": Languages.NOT_DEFINED,
773
- "__label__mzn": Languages.NOT_DEFINED,
774
- "__label__am": Languages.NOT_DEFINED,
775
- "__label__qu": Languages.NOT_DEFINED,
776
- "__label__eml": Languages.NOT_DEFINED,
777
- "__label__cbk": Languages.NOT_DEFINED,
778
- "__label__tk": Languages.NOT_DEFINED,
779
- "__label__rm": Languages.NOT_DEFINED,
780
- "__label__os": Languages.NOT_DEFINED,
781
- "__label__vls": Languages.NOT_DEFINED,
782
- "__label__yo": Languages.NOT_DEFINED,
783
- "__label__lo": Languages.NOT_DEFINED,
784
- "__label__lez": Languages.NOT_DEFINED,
785
- "__label__so": Languages.NOT_DEFINED,
786
- "__label__myv": Languages.NOT_DEFINED,
787
- "__label__diq": Languages.NOT_DEFINED,
788
- "__label__mrj": Languages.NOT_DEFINED,
789
- "__label__dsb": Languages.NOT_DEFINED,
790
- "__label__frr": Languages.NOT_DEFINED,
791
- "__label__ht": Languages.NOT_DEFINED,
792
- "__label__gn": Languages.NOT_DEFINED,
793
- "__label__bxr": Languages.NOT_DEFINED,
794
- "__label__kv": Languages.NOT_DEFINED,
795
- "__label__sc": Languages.NOT_DEFINED,
796
- "__label__nah": Languages.NOT_DEFINED,
797
- "__label__krc": Languages.NOT_DEFINED,
798
- "__label__bcl": Languages.NOT_DEFINED,
799
- "__label__nap": Languages.NOT_DEFINED,
800
- "__label__gv": Languages.NOT_DEFINED,
801
- "__label__av": Languages.NOT_DEFINED,
802
- "__label__rue": Languages.NOT_DEFINED,
803
- "__label__xal": Languages.NOT_DEFINED,
804
- "__label__pfl": Languages.NOT_DEFINED,
805
- "__label__dty": Languages.NOT_DEFINED,
806
- "__label__hif": Languages.NOT_DEFINED,
807
- "__label__co": Languages.NOT_DEFINED,
808
- "__label__lrc": Languages.NOT_DEFINED,
809
- "__label__vep": Languages.NOT_DEFINED,
810
- "__label__tyv": Languages.NOT_DEFINED,
811
- },
812
- model_wrapper="FasttextLangDetector",
813
- ),
814
- }
118
+ CATALOG: dict[str, ModelProfile] = {}
815
119
 
816
120
  @staticmethod
817
121
  def get_full_path_weights(name: PathLikeOrStr) -> PathLikeOrStr:
818
122
  """
819
123
  Returns the absolute path of weights.
820
124
 
821
- Note, that weights are sometimes not defined by only one artefact. The returned string will only represent one
822
- weights artefact.
125
+ Note:
126
+ Weights are sometimes not defined by only one artifact. The returned string will only represent one
127
+ weights artifact.
128
+
129
+ Args:
130
+ name: model name
823
131
 
824
- :param name: model name
825
- :return: absolute weight path
132
+ Returns:
133
+ absolute weight path
826
134
  """
827
135
  try:
828
136
  profile = ModelCatalog.get_profile(os.fspath(name))
@@ -847,14 +155,18 @@ class ModelCatalog:
847
155
  @staticmethod
848
156
  def get_full_path_configs(name: PathLikeOrStr) -> PathLikeOrStr:
849
157
  """
850
- Return the absolute path of configs for some given weights. Alternatively, pass last a path to a config file
158
+ Absolute path of configs for some given weights. Alternatively, pass a path to a config file
851
159
  (without the base path to the cache config directory).
852
160
 
853
- Note, that configs are sometimes not defined by only one file. The returned string will only represent one
854
- file.
161
+ Note:
162
+ Configs are sometimes not defined by only one file. The returned string will only represent one
163
+ file.
855
164
 
856
- :param name: model name
857
- :return: absolute path to the config
165
+ Args:
166
+ name: model name
167
+
168
+ Returns:
169
+ Absolute path to the config
858
170
  """
859
171
  try:
860
172
  profile = ModelCatalog.get_profile(os.fspath(name))
@@ -876,8 +188,11 @@ class ModelCatalog:
876
188
  Return the absolute path of preprocessor configs for some given weights. Preprocessor are occasionally provided
877
189
  by the transformer library.
878
190
 
879
- :param name: model name
880
- :return: absolute path to the preprocessor config
191
+ Args:
192
+ name: model name
193
+
194
+ Returns:
195
+ Absolute path to the preprocessor config
881
196
  """
882
197
 
883
198
  try:
@@ -897,14 +212,16 @@ class ModelCatalog:
897
212
  @staticmethod
898
213
  def get_model_list() -> list[PathLikeOrStr]:
899
214
  """
900
- Returns a list of absolute paths of registered models.
215
+ Returns:
216
+ A list of absolute paths of registered models.
901
217
  """
902
218
  return [os.path.join(get_weights_dir_path(), profile.name) for profile in ModelCatalog.CATALOG.values()]
903
219
 
904
220
  @staticmethod
905
221
  def get_profile_list() -> list[str]:
906
222
  """
907
- Returns a list profile keys.
223
+ Returns:
224
+ A list profile keys.
908
225
  """
909
226
  return list(ModelCatalog.CATALOG.keys())
910
227
 
@@ -913,8 +230,11 @@ class ModelCatalog:
913
230
  """
914
231
  Checks if some weights belong to a registered model
915
232
 
916
- :param path_weights: relative or absolute path
917
- :return: True if the weights are registered in `ModelCatalog`
233
+ Args:
234
+ path_weights: relative or absolute path
235
+
236
+ Returns:
237
+ `True` if the weights are registered in `ModelCatalog`
918
238
  """
919
239
  if (ModelCatalog.get_full_path_weights(path_weights) in ModelCatalog.get_model_list()) or (
920
240
  path_weights in ModelCatalog.get_model_list()
@@ -927,8 +247,11 @@ class ModelCatalog:
927
247
  """
928
248
  Returns the profile of given model name, i.e. the config file, size and urls.
929
249
 
930
- :param name: model name
931
- :return: A dict of model/weights profiles
250
+ Args:
251
+ name: model name
252
+
253
+ Returns:
254
+ A dict of model/weights profiles
932
255
  """
933
256
 
934
257
  profile = ModelCatalog.CATALOG.get(name)
@@ -941,9 +264,10 @@ class ModelCatalog:
941
264
  """
942
265
  Register a model with its profile
943
266
 
944
- :param name: Name of the model. We use the file name of the model along with its path (starting from the
945
- weights .cache dir. e.g. 'my_model/model_123.pkl'.
946
- :param profile: profile of the model
267
+ Args:
268
+ name: Name of the model. We use the file name of the model along with its path (starting from the
269
+ weights `.cache`. e.g. `my_model/model_123.pkl`.
270
+ profile: profile of the model
947
271
  """
948
272
  if name in ModelCatalog.CATALOG:
949
273
  raise KeyError("Model already registered")
@@ -952,9 +276,10 @@ class ModelCatalog:
952
276
  @staticmethod
953
277
  def load_profiles_from_file(path: Optional[PathLikeOrStr] = None) -> None:
954
278
  """
955
- Load model profiles from a jsonl file and extend `CATALOG` with the new profiles.
279
+ Load model profiles from a `jsonl` file and extend `CATALOG` with the new profiles.
956
280
 
957
- :param path: Path to the file. `None` is allowed but it will do nothing.
281
+ Args:
282
+ path: Path to the file. `None` is allowed but will do nothing.
958
283
  """
959
284
  if not path:
960
285
  return
@@ -968,9 +293,10 @@ class ModelCatalog:
968
293
  @staticmethod
969
294
  def save_profiles_to_file(target_path: PathLikeOrStr) -> None:
970
295
  """
971
- Save model profiles to a jsonl file.
296
+ Save model profiles to a `jsonl` file.
972
297
 
973
- :param target_path: Path to the file.
298
+ Args:
299
+ target_path: Path to the file.
974
300
  """
975
301
  with jsonlines.open(target_path, mode="w") as writer:
976
302
  for profile in ModelCatalog.CATALOG.values():
@@ -978,6 +304,11 @@ class ModelCatalog:
978
304
  writer.close()
979
305
 
980
306
 
307
+ # Loading default profiles
308
+ dd_profile_path = maybe_copy_config_to_cache(
309
+ get_package_path(), get_cache_dir_path(), "deepdoctection/configs/profiles.jsonl", False
310
+ )
311
+ ModelCatalog.load_profiles_from_file(dd_profile_path)
981
312
  # Additional profiles can be added
982
313
  ModelCatalog.load_profiles_from_file(os.environ.get("MODEL_CATALOG", None))
983
314
 
@@ -986,8 +317,11 @@ def get_tp_weight_names(name: str) -> list[str]:
986
317
  """
987
318
  Given a path to some model weights it will return all file names according to TP naming convention
988
319
 
989
- :param name: TP model name
990
- :return: A list of TP file names
320
+ Args:
321
+ name: TP model name
322
+
323
+ Returns:
324
+ A list of TP file names
991
325
  """
992
326
  _, file_name = os.path.split(name)
993
327
  prefix, _ = file_name.split(".")
@@ -1002,6 +336,11 @@ def print_model_infos(add_description: bool = True, add_config: bool = True, add
1002
336
  """
1003
337
  Prints a table with all registered model profiles and some of their attributes (name, description, config and
1004
338
  categories)
339
+
340
+ Args:
341
+ add_description: If `True`, the description of the model will be printed
342
+ add_config: If `True`, the config of the model will be printed
343
+ add_categories: If `True`, the categories of the model will be printed
1005
344
  """
1006
345
 
1007
346
  profiles = ModelCatalog.CATALOG.values()
@@ -1038,8 +377,11 @@ class ModelDownloadManager:
1038
377
  Class for organizing downloads of config files and weights from various sources. Internally, it will use model
1039
378
  profiles to know where things are stored.
1040
379
 
380
+ Example:
381
+ ```python
1041
382
  # if you are not sure about the model name use the ModelCatalog
1042
383
  ModelDownloadManager.maybe_download_weights_and_configs("layout/model-800000_inf_only.data-00000-of-00001")
384
+ ```
1043
385
  """
1044
386
 
1045
387
  @staticmethod
@@ -1048,8 +390,10 @@ class ModelDownloadManager:
1048
390
  Check if some model is registered. If yes, it will check if their weights
1049
391
  must be downloaded. Only weights that have not the same expected size will be downloaded again.
1050
392
 
1051
- :param name: A path to some model weights
1052
- :return: Absolute path to model weights if model is registered
393
+ Args:
394
+ name: A path to some model weights
395
+ Returns:
396
+ Absolute path to model weights, if model is registered
1053
397
  """
1054
398
 
1055
399
  absolute_path_weights = ModelCatalog.get_full_path_weights(name)
@@ -1091,10 +435,11 @@ class ModelDownloadManager:
1091
435
  Load a model from the Huggingface hub for a given profile and saves the model at the directory of the given
1092
436
  path.
1093
437
 
1094
- :param profile: Profile according to `ModelCatalog.get_profile(path_weights)`
1095
- :param absolute_path: Absolute path (incl. file name) of target file
1096
- :param file_names: Optionally, replace the file name of the ModelCatalog. This is necessary e.g. for Tensorpack
1097
- models
438
+ Args:
439
+ profile: Profile according to `ModelCatalog.get_profile(path_weights)`
440
+ absolute_path: Absolute path (incl. file name) of target file
441
+ file_names: Optionally, replace the file name of the `ModelCatalog`. This is necessary e.g. for Tensorpack
442
+ models
1098
443
  """
1099
444
  repo_id = profile.hf_repo_id
1100
445
  if repo_id is None:
@@ -1125,15 +470,16 @@ class ModelDownloadManager:
1125
470
  Load config file(s) from the Huggingface hub for a given profile and saves the model at the directory of the
1126
471
  given path.
1127
472
 
1128
- :param profile: Profile according to `ModelCatalog.get_profile(path_weights)`
1129
- :param absolute_path: Absolute path (incl. file name) of target file
473
+ Args:
474
+ profile: Profile according to `ModelCatalog.get_profile(path_weights)`
475
+ absolute_path: Absolute path (incl. file name) of target file
1130
476
  """
1131
477
 
1132
478
  repo_id = profile.hf_repo_id
1133
479
  if repo_id is None:
1134
480
  raise ValueError("hf_repo_id cannot be None")
1135
481
  directory, _ = os.path.split(absolute_path)
1136
- if not profile.hf_config_file:
482
+ if profile.hf_config_file is None:
1137
483
  raise ValueError("hf_config_file cannot be None")
1138
484
  for file_name in profile.hf_config_file:
1139
485
  ModelDownloadManager._load_from_hf_hub(repo_id, file_name, directory)