deepdoctection 0.31__py3-none-any.whl → 0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (91) hide show
  1. deepdoctection/__init__.py +35 -28
  2. deepdoctection/analyzer/dd.py +30 -24
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/datapoint/annotation.py +2 -1
  5. deepdoctection/datapoint/box.py +2 -1
  6. deepdoctection/datapoint/image.py +13 -7
  7. deepdoctection/datapoint/view.py +95 -24
  8. deepdoctection/datasets/__init__.py +1 -4
  9. deepdoctection/datasets/adapter.py +5 -2
  10. deepdoctection/datasets/base.py +5 -3
  11. deepdoctection/datasets/info.py +2 -2
  12. deepdoctection/datasets/instances/doclaynet.py +3 -2
  13. deepdoctection/datasets/instances/fintabnet.py +2 -1
  14. deepdoctection/datasets/instances/funsd.py +2 -1
  15. deepdoctection/datasets/instances/iiitar13k.py +5 -2
  16. deepdoctection/datasets/instances/layouttest.py +2 -1
  17. deepdoctection/datasets/instances/publaynet.py +2 -2
  18. deepdoctection/datasets/instances/pubtables1m.py +6 -3
  19. deepdoctection/datasets/instances/pubtabnet.py +2 -1
  20. deepdoctection/datasets/instances/rvlcdip.py +2 -1
  21. deepdoctection/datasets/instances/xfund.py +2 -1
  22. deepdoctection/eval/__init__.py +1 -4
  23. deepdoctection/eval/cocometric.py +2 -1
  24. deepdoctection/eval/eval.py +17 -13
  25. deepdoctection/eval/tedsmetric.py +14 -11
  26. deepdoctection/eval/tp_eval_callback.py +9 -3
  27. deepdoctection/extern/__init__.py +2 -7
  28. deepdoctection/extern/d2detect.py +24 -32
  29. deepdoctection/extern/deskew.py +4 -2
  30. deepdoctection/extern/doctrocr.py +75 -81
  31. deepdoctection/extern/fastlang.py +4 -2
  32. deepdoctection/extern/hfdetr.py +22 -28
  33. deepdoctection/extern/hflayoutlm.py +335 -103
  34. deepdoctection/extern/hflm.py +225 -0
  35. deepdoctection/extern/model.py +56 -47
  36. deepdoctection/extern/pdftext.py +8 -4
  37. deepdoctection/extern/pt/__init__.py +1 -3
  38. deepdoctection/extern/pt/nms.py +6 -2
  39. deepdoctection/extern/pt/ptutils.py +27 -19
  40. deepdoctection/extern/texocr.py +4 -2
  41. deepdoctection/extern/tp/tfutils.py +43 -9
  42. deepdoctection/extern/tp/tpcompat.py +10 -7
  43. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  44. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  45. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  46. deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
  47. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  48. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
  49. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  50. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  56. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  57. deepdoctection/extern/tp/tpfrcnn/preproc.py +7 -3
  58. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  60. deepdoctection/extern/tpdetect.py +5 -8
  61. deepdoctection/mapper/__init__.py +3 -8
  62. deepdoctection/mapper/d2struct.py +8 -6
  63. deepdoctection/mapper/hfstruct.py +6 -1
  64. deepdoctection/mapper/laylmstruct.py +163 -20
  65. deepdoctection/mapper/maputils.py +3 -1
  66. deepdoctection/mapper/misc.py +6 -3
  67. deepdoctection/mapper/tpstruct.py +2 -2
  68. deepdoctection/pipe/__init__.py +1 -1
  69. deepdoctection/pipe/common.py +11 -9
  70. deepdoctection/pipe/concurrency.py +2 -1
  71. deepdoctection/pipe/layout.py +3 -1
  72. deepdoctection/pipe/lm.py +32 -64
  73. deepdoctection/pipe/order.py +142 -35
  74. deepdoctection/pipe/refine.py +8 -14
  75. deepdoctection/pipe/{cell.py → sub_layout.py} +1 -1
  76. deepdoctection/train/__init__.py +6 -12
  77. deepdoctection/train/d2_frcnn_train.py +21 -16
  78. deepdoctection/train/hf_detr_train.py +18 -11
  79. deepdoctection/train/hf_layoutlm_train.py +118 -101
  80. deepdoctection/train/tp_frcnn_train.py +21 -19
  81. deepdoctection/utils/env_info.py +41 -117
  82. deepdoctection/utils/logger.py +1 -0
  83. deepdoctection/utils/mocks.py +93 -0
  84. deepdoctection/utils/settings.py +1 -0
  85. deepdoctection/utils/viz.py +4 -3
  86. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/METADATA +27 -18
  87. deepdoctection-0.32.dist-info/RECORD +146 -0
  88. deepdoctection-0.31.dist-info/RECORD +0 -144
  89. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
  90. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/WHEEL +0 -0
  91. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,225 @@
1
+ # -*- coding: utf-8 -*-
2
+ # File: hfml.py
3
+
4
+ # Copyright 2024 Dr. Janis Meyer. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ """
19
+ Wrapper for the Hugging Face Language Model for sequence and token classification
20
+ """
21
+ from __future__ import annotations
22
+
23
+ from abc import ABC
24
+ from copy import copy
25
+ from pathlib import Path
26
+ from typing import Any, List, Literal, Mapping, Optional, Tuple, Union
27
+
28
+ from lazy_imports import try_import
29
+
30
+ from ..utils.detection_types import JsonDict, Requirement
31
+ from ..utils.file_utils import get_pytorch_requirement, get_transformers_requirement
32
+ from ..utils.settings import TypeOrStr
33
+ from .base import LMSequenceClassifier, SequenceClassResult
34
+ from .hflayoutlm import get_tokenizer_from_model_class
35
+ from .pt.ptutils import get_torch_device
36
+
37
+ with try_import() as pt_import_guard:
38
+ import torch
39
+ import torch.nn.functional as F
40
+
41
+ with try_import() as tr_import_guard:
42
+ from transformers import PretrainedConfig, XLMRobertaForSequenceClassification
43
+
44
+
45
+ def predict_sequence_classes(
46
+ input_ids: torch.Tensor,
47
+ attention_mask: torch.Tensor,
48
+ token_type_ids: torch.Tensor,
49
+ model: Union[XLMRobertaForSequenceClassification],
50
+ ) -> SequenceClassResult:
51
+ """
52
+ :param input_ids: Token converted to ids to be taken from LayoutLMTokenizer
53
+ :param attention_mask: The associated attention masks from padded sequences taken from LayoutLMTokenizer
54
+ :param token_type_ids: Torch tensor of token type ids taken from LayoutLMTokenizer
55
+ :param model: layoutlm model for sequence classification
56
+ :return: SequenceClassResult
57
+ """
58
+
59
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
60
+
61
+ score = torch.max(F.softmax(outputs.logits)).tolist()
62
+ sequence_class_predictions = outputs.logits.argmax(-1).squeeze().tolist()
63
+
64
+ return SequenceClassResult(class_id=sequence_class_predictions, score=float(score)) # type: ignore
65
+
66
+
67
+ class HFLmSequenceClassifierBase(LMSequenceClassifier, ABC):
68
+ """
69
+ Abstract base class for wrapping Bert-type models for sequence classification into the deepdoctection framework.
70
+ """
71
+
72
+ model: Union[XLMRobertaForSequenceClassification]
73
+
74
+ def __init__(
75
+ self,
76
+ path_config_json: str,
77
+ path_weights: str,
78
+ categories: Mapping[str, TypeOrStr],
79
+ device: Optional[Union[Literal["cpu", "cuda"], torch.device]] = None,
80
+ use_xlm_tokenizer: bool = False,
81
+ ):
82
+ self.path_config = path_config_json
83
+ self.path_weights = path_weights
84
+ self.categories = copy(categories) # type: ignore
85
+
86
+ self.device = get_torch_device(device)
87
+ self.model.to(self.device)
88
+ self.model.config.tokenizer_class = self.get_tokenizer_class_name(use_xlm_tokenizer)
89
+
90
+ @classmethod
91
+ def get_requirements(cls) -> List[Requirement]:
92
+ return [get_pytorch_requirement(), get_transformers_requirement()]
93
+
94
+ def clone(self) -> HFLmSequenceClassifierBase:
95
+ return self.__class__(self.path_config, self.path_weights, self.categories, self.device)
96
+
97
+ def _validate_encodings(
98
+ self, **encodings: Union[List[List[str]], torch.Tensor]
99
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
100
+ input_ids = encodings.get("input_ids")
101
+ attention_mask = encodings.get("attention_mask")
102
+ token_type_ids = encodings.get("token_type_ids")
103
+
104
+ if isinstance(input_ids, torch.Tensor):
105
+ input_ids = input_ids.to(self.device)
106
+ else:
107
+ raise ValueError(f"input_ids must be list but is {type(input_ids)}")
108
+ if isinstance(attention_mask, torch.Tensor):
109
+ attention_mask = attention_mask.to(self.device)
110
+ else:
111
+ raise ValueError(f"attention_mask must be list but is {type(attention_mask)}")
112
+ if isinstance(token_type_ids, torch.Tensor):
113
+ token_type_ids = token_type_ids.to(self.device)
114
+ else:
115
+ raise ValueError(f"token_type_ids must be list but is {type(token_type_ids)}")
116
+
117
+ input_ids = input_ids.to(self.device)
118
+ attention_mask = attention_mask.to(self.device)
119
+ token_type_ids = token_type_ids.to(self.device)
120
+ return input_ids, attention_mask, token_type_ids
121
+
122
+ @staticmethod
123
+ def get_name(path_weights: str, architecture: str) -> str:
124
+ """Returns the name of the model"""
125
+ return f"Transformers_{architecture}_" + "_".join(Path(path_weights).parts[-2:])
126
+
127
+ def get_tokenizer_class_name(self, use_xlm_tokenizer: bool) -> str:
128
+ """A refinement for adding the tokenizer class name to the model configs.
129
+
130
+ :param use_xlm_tokenizer: Whether to use a XLM tokenizer.
131
+ """
132
+ tokenizer = get_tokenizer_from_model_class(self.model.__class__.__name__, use_xlm_tokenizer)
133
+ return tokenizer.__class__.__name__
134
+
135
+ @staticmethod
136
+ def image_to_raw_features_mapping() -> str:
137
+ """Returns the mapping function to convert images into raw features."""
138
+ return "image_to_raw_lm_features"
139
+
140
+ @staticmethod
141
+ def image_to_features_mapping() -> str:
142
+ """Returns the mapping function to convert images into features."""
143
+ return "image_to_lm_features"
144
+
145
+
146
+ class HFLmSequenceClassifier(HFLmSequenceClassifierBase):
147
+ """
148
+ A wrapper class for `transformers.XLMRobertaForSequenceClassification` and similar models to use within a pipeline
149
+ component. Check <https://huggingface.co/docs/transformers/model_doc/xlm-roberta> for documentation of the
150
+ model itself.
151
+ Note that this model is equipped with a head that is only useful for classifying the input sequence. For token
152
+ classification and other things please use another model of the family.
153
+
154
+ **Example**
155
+
156
+ # setting up compulsory ocr service
157
+ tesseract_config_path = ModelCatalog.get_full_path_configs("/dd/conf_tesseract.yaml")
158
+ tess = TesseractOcrDetector(tesseract_config_path)
159
+ ocr_service = TextExtractionService(tess)
160
+
161
+ # hf tokenizer and token classifier
162
+ tokenizer = XLMRobertaTokenizerFast.from_pretrained("FacebookAI/xlm-roberta-base")
163
+ roberta = HFLmSequenceClassifier("path/to/config.json","path/to/model.bin",
164
+ categories=["handwritten", "presentation", "resume"])
165
+
166
+ # token classification service
167
+ roberta_service = LMSequenceClassifierService(tokenizer,roberta)
168
+
169
+ pipe = DoctectionPipe(pipeline_component_list=[ocr_service,roberta_service])
170
+
171
+ path = "path/to/some/form"
172
+ df = pipe.analyze(path=path)
173
+
174
+ for dp in df:
175
+ ...
176
+ """
177
+
178
+ def __init__(
179
+ self,
180
+ path_config_json: str,
181
+ path_weights: str,
182
+ categories: Mapping[str, TypeOrStr],
183
+ device: Optional[Union[Literal["cpu", "cuda"], torch.device]] = None,
184
+ use_xlm_tokenizer: bool = True,
185
+ ):
186
+ self.name = self.get_name(path_weights, "bert-like")
187
+ self.model_id = self.get_model_id()
188
+ self.model = self.get_wrapped_model(path_config_json, path_weights)
189
+ super().__init__(path_config_json, path_weights, categories, device, use_xlm_tokenizer)
190
+
191
+ def predict(self, **encodings: Union[List[List[str]], torch.Tensor]) -> SequenceClassResult:
192
+ input_ids, attention_mask, token_type_ids = self._validate_encodings(**encodings)
193
+
194
+ result = predict_sequence_classes(
195
+ input_ids,
196
+ attention_mask,
197
+ token_type_ids,
198
+ self.model,
199
+ )
200
+
201
+ result.class_id += 1
202
+ result.class_name = self.categories[str(result.class_id)]
203
+ return result
204
+
205
+ @staticmethod
206
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
207
+ """
208
+ Get the inner (wrapped) model.
209
+
210
+ :param path_config_json: path to .json config file
211
+ :param path_weights: path to model artifact
212
+ :return: 'nn.Module'
213
+ """
214
+ config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
215
+ return XLMRobertaForSequenceClassification.from_pretrained(
216
+ pretrained_model_name_or_path=path_weights, config=config
217
+ )
218
+
219
+ @staticmethod
220
+ def default_kwargs_for_input_mapping() -> JsonDict:
221
+ """
222
+ Add some default arguments that might be necessary when preparing a sample. Overwrite this method
223
+ for some custom setting.
224
+ """
225
+ return {}
@@ -185,25 +185,6 @@ class ModelCatalog:
185
185
  dl_library="TF",
186
186
  model_wrapper="TPFrcnnDetector",
187
187
  ),
188
- "layout/d2_model-800000-layout.pkl": ModelProfile(
189
- name="layout/d2_model-800000-layout.pkl",
190
- description="Detectron2 layout detection model trained on Publaynet",
191
- config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml",
192
- size=[274568239],
193
- tp_model=False,
194
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only",
195
- hf_model_name="d2_model-800000-layout.pkl",
196
- hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
197
- categories={
198
- "1": LayoutType.text,
199
- "2": LayoutType.title,
200
- "3": LayoutType.list,
201
- "4": LayoutType.table,
202
- "5": LayoutType.figure,
203
- },
204
- dl_library="PT",
205
- model_wrapper="D2FrcnnDetector",
206
- ),
207
188
  "layout/d2_model_0829999_layout_inf_only.pt": ModelProfile(
208
189
  name="layout/d2_model_0829999_layout_inf_only.pt",
209
190
  description="Detectron2 layout detection model trained on Publaynet",
@@ -261,19 +242,6 @@ class ModelCatalog:
261
242
  dl_library="PT",
262
243
  model_wrapper="D2FrcnnTracingDetector",
263
244
  ),
264
- "cell/d2_model-1800000-cell.pkl": ModelProfile(
265
- name="cell/d2_model-1800000-cell.pkl",
266
- description="Detectron2 cell detection inference only model trained on Pubtabnet",
267
- config="dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml",
268
- size=[274519039],
269
- tp_model=False,
270
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only",
271
- hf_model_name="d2_model-1800000-cell.pkl",
272
- hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
273
- categories={"1": LayoutType.cell},
274
- dl_library="PT",
275
- model_wrapper="D2FrcnnDetector",
276
- ),
277
245
  "cell/d2_model_1849999_cell_inf_only.pt": ModelProfile(
278
246
  name="cell/d2_model_1849999_cell_inf_only.pt",
279
247
  description="Detectron2 cell detection inference only model trained on Pubtabnet",
@@ -313,19 +281,6 @@ class ModelCatalog:
313
281
  dl_library="PT",
314
282
  model_wrapper="D2FrcnnDetector",
315
283
  ),
316
- "item/d2_model-1620000-item.pkl": ModelProfile(
317
- name="item/d2_model-1620000-item.pkl",
318
- description="Detectron2 item detection inference only model trained on Pubtabnet",
319
- config="dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml",
320
- size=[274531339],
321
- tp_model=False,
322
- hf_repo_id="deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only",
323
- hf_model_name="d2_model-1620000-item.pkl",
324
- hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
325
- categories={"1": LayoutType.row, "2": LayoutType.column},
326
- dl_library="PT",
327
- model_wrapper="D2FrcnnDetector",
328
- ),
329
284
  "item/d2_model_1639999_item.pth": ModelProfile(
330
285
  name="item/d2_model_1639999_item.pth",
331
286
  description="Detectron2 item detection model trained on Pubtabnet",
@@ -365,6 +320,45 @@ class ModelCatalog:
365
320
  dl_library="PT",
366
321
  model_wrapper="D2FrcnnTracingDetector",
367
322
  ),
323
+ "nielsr/lilt-xlm-roberta-base/pytorch_model.bin": ModelProfile(
324
+ name="nielsr/lilt-xlm-roberta-base/pytorch_model.bin",
325
+ description="LiLT build with a RobertaXLM base model",
326
+ config="nielsr/lilt-xlm-roberta-base/config.json",
327
+ size=[1136743583],
328
+ tp_model=False,
329
+ hf_repo_id="nielsr/lilt-xlm-roberta-base",
330
+ hf_model_name="pytorch_model.bin",
331
+ hf_config_file=["config.json"],
332
+ dl_library="PT",
333
+ ),
334
+ "SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin": ModelProfile(
335
+ name="SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin",
336
+ description="Language-Independent Layout Transformer - InfoXLM model by stitching a pre-trained InfoXLM"
337
+ " and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced"
338
+ " in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for"
339
+ " Structured Document Understanding by Wang et al. and first released in this repository.",
340
+ config="SCUT-DLVCLab/lilt-infoxlm-base/config.json",
341
+ size=[1136743583],
342
+ tp_model=False,
343
+ hf_repo_id="SCUT-DLVCLab/lilt-infoxlm-base",
344
+ hf_model_name="pytorch_model.bin",
345
+ hf_config_file=["config.json"],
346
+ dl_library="PT",
347
+ ),
348
+ "SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin": ModelProfile(
349
+ name="SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin",
350
+ description="Language-Independent Layout Transformer - RoBERTa model by stitching a pre-trained RoBERTa"
351
+ " (English) and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was"
352
+ " introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer"
353
+ " for Structured Document Understanding by Wang et al. and first released in this repository.",
354
+ config="SCUT-DLVCLab/lilt-roberta-en-base/config.json",
355
+ size=[523151519],
356
+ tp_model=False,
357
+ hf_repo_id="SCUT-DLVCLab/lilt-roberta-en-base",
358
+ hf_model_name="pytorch_model.bin",
359
+ hf_config_file=["config.json"],
360
+ dl_library="PT",
361
+ ),
368
362
  "microsoft/layoutlm-base-uncased/pytorch_model.bin": ModelProfile(
369
363
  name="microsoft/layoutlm-base-uncased/pytorch_model.bin",
370
364
  description="LayoutLM is a simple but effective pre-training method of text and layout for document image"
@@ -535,6 +529,19 @@ class ModelCatalog:
535
529
  model_wrapper="DoctrTextRecognizer",
536
530
  architecture="crnn_vgg16_bn",
537
531
  ),
532
+ "FacebookAI/xlm-roberta-base": ModelProfile(
533
+ name="FacebookAI/xlm-roberta-base/pytorch_model.bin",
534
+ description="XLM-RoBERTa model pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages."
535
+ " It was introduced in the paper Unsupervised Cross-lingual Representation Learning at Scale"
536
+ " by Conneau et al. and first released in this repository.",
537
+ size=[1115590446],
538
+ tp_model=False,
539
+ config="FacebookAI/xlm-roberta-base/config.json",
540
+ hf_repo_id="FacebookAI/xlm-roberta-base",
541
+ hf_model_name="pytorch_model.bin",
542
+ hf_config_file=["config.json"],
543
+ dl_library="PT",
544
+ ),
538
545
  "fasttext/lid.176.bin": ModelProfile(
539
546
  name="fasttext/lid.176.bin",
540
547
  description="Fasttext language detection model",
@@ -980,9 +987,11 @@ class ModelDownloadManager:
980
987
  else:
981
988
  file_names.append(model_name)
982
989
  if profile.hf_repo_id:
983
- ModelDownloadManager.load_model_from_hf_hub(profile, absolute_path_weights, file_names)
990
+ if not os.path.isfile(absolute_path_weights):
991
+ ModelDownloadManager.load_model_from_hf_hub(profile, absolute_path_weights, file_names)
984
992
  absolute_path_configs = ModelCatalog.get_full_path_configs(name)
985
- ModelDownloadManager.load_configs_from_hf_hub(profile, absolute_path_configs)
993
+ if not os.path.isfile(absolute_path_configs):
994
+ ModelDownloadManager.load_configs_from_hf_hub(profile, absolute_path_configs)
986
995
  else:
987
996
  ModelDownloadManager._load_from_gd(profile, absolute_path_weights, file_names)
988
997
 
@@ -21,13 +21,15 @@ PDFPlumber text extraction engine
21
21
 
22
22
  from typing import Dict, List, Tuple
23
23
 
24
+ from lazy_imports import try_import
25
+
24
26
  from ..utils.context import save_tmp_file
25
27
  from ..utils.detection_types import Requirement
26
- from ..utils.file_utils import get_pdfplumber_requirement, pdfplumber_available
28
+ from ..utils.file_utils import get_pdfplumber_requirement
27
29
  from ..utils.settings import LayoutType, ObjectTypes
28
30
  from .base import DetectionResult, PdfMiner
29
31
 
30
- if pdfplumber_available():
32
+ with try_import() as import_guard:
31
33
  from pdfplumber.pdf import PDF
32
34
 
33
35
 
@@ -64,10 +66,12 @@ class PdfPlumberTextDetector(PdfMiner):
64
66
 
65
67
  """
66
68
 
67
- def __init__(self) -> None:
69
+ def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
68
70
  self.name = "Pdfplumber"
69
71
  self.model_id = self.get_model_id()
70
72
  self.categories = {"1": LayoutType.word}
73
+ self.x_tolerance = x_tolerance
74
+ self.y_tolerance = y_tolerance
71
75
 
72
76
  def predict(self, pdf_bytes: bytes) -> List[DetectionResult]:
73
77
  """
@@ -82,7 +86,7 @@ class PdfPlumberTextDetector(PdfMiner):
82
86
  _pdf = PDF(fin)
83
87
  self._page = _pdf.pages[0]
84
88
  self._pdf_bytes = pdf_bytes
85
- words = self._page.extract_words()
89
+ words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
86
90
  detect_results = list(map(_to_detect_result, words))
87
91
  return detect_results
88
92
 
@@ -19,7 +19,5 @@
19
19
  Init file for pytorch compatibility package
20
20
  """
21
21
 
22
+ from .nms import *
22
23
  from .ptutils import *
23
-
24
- if pytorch_available():
25
- from .nms import *
@@ -18,9 +18,13 @@
18
18
  """
19
19
  Module for custom NMS functions.
20
20
  """
21
+ from __future__ import annotations
21
22
 
22
- import torch
23
- from torchvision.ops import boxes as box_ops # type: ignore
23
+ from lazy_imports import try_import
24
+
25
+ with try_import() as import_guard:
26
+ import torch
27
+ from torchvision.ops import boxes as box_ops # type: ignore
24
28
 
25
29
 
26
30
  # Copy & paste from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/nms.py
@@ -18,32 +18,40 @@
18
18
  """
19
19
  Torch related utils
20
20
  """
21
+ from __future__ import annotations
21
22
 
23
+ import os
24
+ from typing import Optional, Union
22
25
 
23
- from ...utils.error import DependencyError
24
- from ...utils.file_utils import pytorch_available
26
+ from lazy_imports import try_import
25
27
 
28
+ with try_import() as import_guard:
29
+ import torch
26
30
 
27
- def set_torch_auto_device() -> "torch.device": # type: ignore
28
- """
29
- Returns cuda device if available, otherwise cpu
31
+
32
+ def get_torch_device(device: Optional[Union[str, torch.device]] = None) -> torch.device:
30
33
  """
31
- if pytorch_available():
32
- from torch import cuda, device # pylint: disable=C0415
34
+ Selecting a device on which to load a model. The selection follows a cascade of priorities:
33
35
 
34
- return device("cuda" if cuda.is_available() else "cpu")
35
- raise DependencyError("Pytorch must be installed")
36
+ - If a device string is provided, it is used.
37
+ - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available, it will use all of them
38
+ unless something else is specified by CUDA_VISIBLE_DEVICES:
36
39
 
40
+ https://stackoverflow.com/questions/54216920/how-to-use-multiple-gpus-in-pytorch
37
41
 
38
- def get_num_gpu() -> int:
39
- """
40
- Returns number of CUDA devices if pytorch is available
42
+ - If an MPS device is available, it is used.
43
+ - Otherwise, the CPU is used.
41
44
 
42
- :return:
45
+ :param device: Device either as string or torch.device
46
+ :return: Tensorflow device
43
47
  """
44
-
45
- if pytorch_available():
46
- from torch import cuda # pylint: disable=C0415
47
-
48
- return cuda.device_count()
49
- raise DependencyError("Pytorch must be installed")
48
+ if device is not None:
49
+ if isinstance(device, torch.device):
50
+ return device
51
+ if isinstance(device, str):
52
+ return torch.device(device)
53
+ if os.environ.get("USE_CUDA"):
54
+ return torch.device("cuda")
55
+ if os.environ.get("USE_MPS"):
56
+ return torch.device("mps")
57
+ return torch.device("cpu")
@@ -23,14 +23,16 @@ import sys
23
23
  import traceback
24
24
  from typing import List
25
25
 
26
+ from lazy_imports import try_import
27
+
26
28
  from ..datapoint.convert import convert_np_array_to_b64_b
27
29
  from ..utils.detection_types import ImageType, JsonDict, Requirement
28
- from ..utils.file_utils import boto3_available, get_boto3_requirement
30
+ from ..utils.file_utils import get_boto3_requirement
29
31
  from ..utils.logger import LoggingRecord, logger
30
32
  from ..utils.settings import LayoutType, ObjectTypes
31
33
  from .base import DetectionResult, ObjectDetector, PredictorBase
32
34
 
33
- if boto3_available():
35
+ with try_import() as import_guard:
34
36
  import boto3 # type:ignore
35
37
 
36
38
 
@@ -19,7 +19,18 @@
19
19
  Tensorflow related utils.
20
20
  """
21
21
 
22
- from tensorpack.models import disable_layer_logging # pylint: disable=E0401
22
+ from __future__ import annotations
23
+
24
+ import os
25
+ from typing import Optional, Union, ContextManager
26
+
27
+ from lazy_imports import try_import
28
+
29
+ with try_import() as import_guard:
30
+ from tensorpack.models import disable_layer_logging # pylint: disable=E0401
31
+
32
+ with try_import() as tf_import_guard:
33
+ import tensorflow as tf # pylint: disable=E0401
23
34
 
24
35
 
25
36
  def is_tfv2() -> bool:
@@ -38,16 +49,13 @@ def disable_tfv2() -> bool:
38
49
  """
39
50
  Disable TF in V2 mode.
40
51
  """
41
- try:
42
- import tensorflow as tf # pylint: disable=C0415
43
52
 
44
- tfv1 = tf.compat.v1
45
- if is_tfv2():
46
- tfv1.disable_v2_behavior()
47
- tfv1.disable_eager_execution()
53
+ tfv1 = tf.compat.v1
54
+ if is_tfv2():
55
+ tfv1.disable_v2_behavior()
56
+ tfv1.disable_eager_execution()
48
57
  return True
49
- except ModuleNotFoundError:
50
- return False
58
+ return False
51
59
 
52
60
 
53
61
  def disable_tp_layer_logging() -> None:
@@ -55,3 +63,29 @@ def disable_tp_layer_logging() -> None:
55
63
  Disables TP layer logging, if not already set
56
64
  """
57
65
  disable_layer_logging()
66
+
67
+
68
+ def get_tf_device(device: Optional[Union[str, tf.device]] = None) -> tf.device:
69
+ """
70
+ Selecting a device on which to load a model. The selection follows a cascade of priorities:
71
+
72
+ - If a device string is provided, it is used. If the string is "cuda" or "GPU", the first GPU is used.
73
+ - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available it will use the first one
74
+
75
+ :param device: Device string
76
+ :return: Tensorflow device
77
+ """
78
+ if device is not None:
79
+ if isinstance(device, ContextManager):
80
+ return device
81
+ if isinstance(device, str):
82
+ if device in ("cuda", "GPU"):
83
+ device_names = [device.name for device in tf.config.list_logical_devices(device_type="GPU")]
84
+ return tf.device(device_names[0].name)
85
+ # The input must be something sensible
86
+ return tf.device(device)
87
+ if os.environ.get("USE_CUDA"):
88
+ device_names = [device.name for device in tf.config.list_logical_devices(device_type="GPU")]
89
+ return tf.device(device_names[0])
90
+ device_names = [device.name for device in tf.config.list_logical_devices(device_type="CPU")]
91
+ return tf.device(device_names[0])
@@ -18,21 +18,24 @@
18
18
  """
19
19
  Compatibility classes and methods related to Tensorpack package
20
20
  """
21
+ from __future__ import annotations
21
22
 
22
23
  from abc import ABC, abstractmethod
23
24
  from typing import Any, List, Mapping, Tuple, Union
24
25
 
25
- from tensorpack.predict import OfflinePredictor, PredictConfig # pylint: disable=E0401
26
- from tensorpack.tfutils import SmartInit # pylint: disable=E0401
27
-
28
- # pylint: disable=import-error
29
- from tensorpack.train.model_desc import ModelDesc
30
- from tensorpack.utils.gpu import get_num_gpu
26
+ from lazy_imports import try_import
31
27
 
32
28
  from ...utils.metacfg import AttrDict
33
29
  from ...utils.settings import ObjectTypes
34
30
 
35
- # pylint: enable=import-error
31
+ with try_import() as import_guard:
32
+ from tensorpack.predict import OfflinePredictor, PredictConfig # pylint: disable=E0401
33
+ from tensorpack.tfutils import SmartInit # pylint: disable=E0401
34
+ from tensorpack.train.model_desc import ModelDesc # pylint: disable=E0401
35
+ from tensorpack.utils.gpu import get_num_gpu # pylint: disable=E0401
36
+
37
+ if not import_guard.is_successful():
38
+ from ...utils.mocks import ModelDesc
36
39
 
37
40
 
38
41
  class ModelDescWithConfig(ModelDesc, ABC): # type: ignore
@@ -0,0 +1,20 @@
1
+ # -*- coding: utf-8 -*-
2
+ # File: __init__.py
3
+
4
+ # Copyright 2021 Dr. Janis Meyer. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ """
19
+ Init file for code for Tensorpack FRCNN example
20
+ """
@@ -11,13 +11,17 @@ This file is modified from
11
11
 
12
12
 
13
13
  import numpy as np
14
- from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform # pylint: disable=E0401
14
+ from lazy_imports import try_import
15
15
 
16
- from ....utils.file_utils import cocotools_available
16
+ with try_import() as import_guard:
17
+ from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform # pylint: disable=E0401
17
18
 
18
- if cocotools_available():
19
+ with try_import() as cc_import_guard:
19
20
  import pycocotools.mask as coco_mask
20
21
 
22
+ if not import_guard.is_successful():
23
+ from ....utils.mocks import ImageAugmentor
24
+
21
25
 
22
26
  class CustomResize(ImageAugmentor):
23
27
  """