deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.1.dist-info/METADATA +0 -431
- deepdoctection-0.42.1.dist-info/RECORD +0 -148
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
D2 GeneralizedRCNN
|
|
19
|
+
D2 `GeneralizedRCNN` models in PyTorch or Torchscript
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
import io
|
|
@@ -53,12 +54,14 @@ with try_import() as d2_import_guard:
|
|
|
53
54
|
|
|
54
55
|
def _d2_post_processing(predictions: dict[str, Instances], nms_thresh_class_agnostic: float) -> dict[str, Instances]:
|
|
55
56
|
"""
|
|
56
|
-
D2 postprocessing steps
|
|
57
|
-
|
|
57
|
+
D2 postprocessing steps. Apply a class agnostic NMS.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
predictions: Prediction outputs from the model.
|
|
61
|
+
nms_thresh_class_agnostic: Nms being performed over all class predictions
|
|
58
62
|
|
|
59
|
-
:
|
|
60
|
-
|
|
61
|
-
:return: filtered predictions outputs
|
|
63
|
+
Returns:
|
|
64
|
+
filtered Instances
|
|
62
65
|
"""
|
|
63
66
|
instances = predictions["instances"]
|
|
64
67
|
class_masks = torch.ones(instances.pred_classes.shape, dtype=torch.uint8)
|
|
@@ -74,14 +77,17 @@ def d2_predict_image(
|
|
|
74
77
|
nms_thresh_class_agnostic: float,
|
|
75
78
|
) -> list[DetectionResult]:
|
|
76
79
|
"""
|
|
77
|
-
Run detection on one image
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
Run detection on one image. It will also handle the preprocessing internally which is using a custom resizing
|
|
81
|
+
within some bounds.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
np_img: ndarray
|
|
85
|
+
predictor: torch nn module implemented in Detectron2
|
|
86
|
+
resizer: instance for resizing the input image
|
|
87
|
+
nms_thresh_class_agnostic: class agnostic NMS threshold
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
list of `DetectionResult`s
|
|
85
91
|
"""
|
|
86
92
|
height, width = np_img.shape[:2]
|
|
87
93
|
resized_img = resizer.get_transform(np_img).apply_image(np_img)
|
|
@@ -107,15 +113,18 @@ def d2_jit_predict_image(
|
|
|
107
113
|
np_img: PixelValues, d2_predictor: nn.Module, resizer: InferenceResize, nms_thresh_class_agnostic: float
|
|
108
114
|
) -> list[DetectionResult]:
|
|
109
115
|
"""
|
|
110
|
-
Run detection on an image using
|
|
111
|
-
is using a custom resizing within some bounds. Moreover, and different from the setting where
|
|
116
|
+
Run detection on an image using Torchscript. It will also handle the preprocessing internally which
|
|
117
|
+
is using a custom resizing within some bounds. Moreover, and different from the setting where D2 is used
|
|
112
118
|
it will also handle the resizing of the bounding box coords to the original image size.
|
|
113
119
|
|
|
114
|
-
:
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
120
|
+
Args:
|
|
121
|
+
np_img: ndarray
|
|
122
|
+
d2_predictor: torchscript nn module
|
|
123
|
+
resizer: instance for resizing the input image
|
|
124
|
+
nms_thresh_class_agnostic: class agnostic nms threshold
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
list of `DetectionResult`s
|
|
119
128
|
"""
|
|
120
129
|
height, width = np_img.shape[:2]
|
|
121
130
|
resized_img = resizer.get_transform(np_img).apply_image(np_img)
|
|
@@ -153,11 +162,14 @@ class D2FrcnnDetectorMixin(ObjectDetector, ABC):
|
|
|
153
162
|
filter_categories: Optional[Sequence[TypeOrStr]] = None,
|
|
154
163
|
):
|
|
155
164
|
"""
|
|
156
|
-
:
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
165
|
+
Args:
|
|
166
|
+
categories: A dict with key (indices) and values (category names). Index 0 must be reserved for a
|
|
167
|
+
dummy 'BG' category.
|
|
168
|
+
Note:
|
|
169
|
+
This convention is different from the builtin D2 framework, where models in the model
|
|
170
|
+
zoo are trained with `BG` class having the highest index.
|
|
171
|
+
filter_categories: The model might return objects that are not supposed to be predicted and that should
|
|
172
|
+
be filtered. Pass a list of category names that must not be returned
|
|
161
173
|
"""
|
|
162
174
|
|
|
163
175
|
self.categories = ModelCategories(init_categories=categories)
|
|
@@ -166,10 +178,12 @@ class D2FrcnnDetectorMixin(ObjectDetector, ABC):
|
|
|
166
178
|
|
|
167
179
|
def _map_category_names(self, detection_results: list[DetectionResult]) -> list[DetectionResult]:
|
|
168
180
|
"""
|
|
169
|
-
Populating category names to
|
|
181
|
+
Populating category names to `DetectionResult`s
|
|
170
182
|
|
|
171
|
-
:
|
|
172
|
-
|
|
183
|
+
Args:
|
|
184
|
+
detection_results: list of `DetectionResult`s. Will also filter categories
|
|
185
|
+
Returns:
|
|
186
|
+
List of `DetectionResult`s with attribute `class_name` populated
|
|
173
187
|
"""
|
|
174
188
|
filtered_detection_result: list[DetectionResult] = []
|
|
175
189
|
shifted_categories = self.categories.shift_category_ids(shift_by=-1)
|
|
@@ -190,8 +204,9 @@ class D2FrcnnDetectorMixin(ObjectDetector, ABC):
|
|
|
190
204
|
def get_inference_resizer(min_size_test: int, max_size_test: int) -> InferenceResize:
|
|
191
205
|
"""Returns the resizer for the inference
|
|
192
206
|
|
|
193
|
-
:
|
|
194
|
-
|
|
207
|
+
Args:
|
|
208
|
+
min_size_test: minimum size of the resized image
|
|
209
|
+
max_size_test: maximum size of the resized image
|
|
195
210
|
"""
|
|
196
211
|
return InferenceResize(min_size_test, max_size_test)
|
|
197
212
|
|
|
@@ -208,19 +223,21 @@ class D2FrcnnDetector(D2FrcnnDetectorMixin):
|
|
|
208
223
|
|
|
209
224
|
Currently, masks are not included in the data model.
|
|
210
225
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
226
|
+
Note:
|
|
227
|
+
There are no adjustment to the original implementation of Detectron2. Only one post-processing step is followed
|
|
228
|
+
by the standard D2 output that takes into account of the situation that detected objects are disjoint. For more
|
|
229
|
+
infos on this topic, see <https://github.com/facebookresearch/detectron2/issues/978>.
|
|
214
230
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
231
|
+
Example:
|
|
232
|
+
```python
|
|
233
|
+
config_path = ModelCatalog.get_full_path_configs("dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml")
|
|
234
|
+
weights_path = ModelDownloadManager.maybe_download_weights_and_configs("item/d2_model-800000-layout.pkl")
|
|
235
|
+
categories = ModelCatalog.get_profile("item/d2_model-800000-layout.pkl").categories
|
|
219
236
|
|
|
220
|
-
|
|
237
|
+
d2_predictor = D2FrcnnDetector(config_path,weights_path,categories,device="cpu")
|
|
221
238
|
|
|
222
|
-
|
|
223
|
-
|
|
239
|
+
detection_results = d2_predictor.predict(bgr_image_np_array)
|
|
240
|
+
```
|
|
224
241
|
"""
|
|
225
242
|
|
|
226
243
|
def __init__(
|
|
@@ -238,17 +255,18 @@ class D2FrcnnDetector(D2FrcnnDetectorMixin):
|
|
|
238
255
|
The configuration of the model uses the full stack of build model tools of D2. For more information
|
|
239
256
|
please check <https://detectron2.readthedocs.io/en/latest/tutorials/models.html#build-models-from-yacs-config>.
|
|
240
257
|
|
|
241
|
-
:
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
258
|
+
Args:
|
|
259
|
+
path_yaml: The path to the yaml config. If the model is built using several config files, always use
|
|
260
|
+
the highest level .yaml file.
|
|
261
|
+
path_weights: The path to the model checkpoint.
|
|
262
|
+
categories: A dict with key (indices) and values (category names). Index 0 must be reserved for a
|
|
263
|
+
dummy `BG` category. Note, that this convention is different from the builtin D2 framework,
|
|
264
|
+
where models in the model zoo are trained with `BG` class having the highest index.
|
|
265
|
+
config_overwrite: Overwrite some hyperparameters defined by the yaml file with some new values. E.g.
|
|
266
|
+
`["OUTPUT.FRCNN_NMS_THRESH=0.3","OUTPUT.RESULT_SCORE_THRESH=0.6"]`.
|
|
267
|
+
device: "cpu" or "cuda". If not specified will auto select depending on what is available
|
|
268
|
+
filter_categories: The model might return objects that are not supposed to be predicted and that should
|
|
269
|
+
be filtered. Pass a list of category names that must not be returned
|
|
252
270
|
"""
|
|
253
271
|
super().__init__(categories, filter_categories)
|
|
254
272
|
|
|
@@ -283,10 +301,13 @@ class D2FrcnnDetector(D2FrcnnDetectorMixin):
|
|
|
283
301
|
@staticmethod
|
|
284
302
|
def _set_model(config: CfgNode) -> GeneralizedRCNN:
|
|
285
303
|
"""
|
|
286
|
-
Build the
|
|
304
|
+
Build the model. It uses the available built-in tools of D2
|
|
287
305
|
|
|
288
|
-
:
|
|
289
|
-
|
|
306
|
+
Args:
|
|
307
|
+
config: Model config
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
`GeneralizedRCNN` model
|
|
290
311
|
"""
|
|
291
312
|
return build_model(config.clone()).eval()
|
|
292
313
|
|
|
@@ -299,8 +320,11 @@ class D2FrcnnDetector(D2FrcnnDetectorMixin):
|
|
|
299
320
|
"""
|
|
300
321
|
Prediction per image.
|
|
301
322
|
|
|
302
|
-
:
|
|
303
|
-
|
|
323
|
+
Args:
|
|
324
|
+
np_img: image as `np.array`
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
A list of `DetectionResult`s
|
|
304
328
|
"""
|
|
305
329
|
detection_results = d2_predict_image(
|
|
306
330
|
np_img,
|
|
@@ -332,25 +356,28 @@ class D2FrcnnDetector(D2FrcnnDetectorMixin):
|
|
|
332
356
|
device: Optional[Union[Literal["cpu", "cuda"], torch.device]] = None,
|
|
333
357
|
) -> GeneralizedRCNN:
|
|
334
358
|
"""
|
|
335
|
-
Get the wrapped model. Useful if one
|
|
359
|
+
Get the wrapped model. Useful, if one does not want to build the wrapper but only needs the instantiated model.
|
|
336
360
|
|
|
337
361
|
Example:
|
|
338
|
-
|
|
339
|
-
|
|
362
|
+
```python
|
|
340
363
|
path_yaml = ModelCatalog.get_full_path_configs("dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml")
|
|
341
364
|
weights_path = ModelDownloadManager.maybe_download_weights_and_configs("item/d2_model-800000-layout.pkl")
|
|
342
365
|
model = D2FrcnnDetector.get_wrapped_model(path_yaml,weights_path,["OUTPUT.FRCNN_NMS_THRESH=0.3",
|
|
343
366
|
"OUTPUT.RESULT_SCORE_THRESH=0.6"],
|
|
344
367
|
"cpu")
|
|
345
368
|
detect_result_list = d2_predict_image(np_img,model,InferenceResize(800,1333),0.3)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
path_yaml: The path to the `yaml` config. If the model is built using several config files, always use
|
|
373
|
+
the highest level `.yaml` file.
|
|
374
|
+
path_weights: The path to the model checkpoint.
|
|
375
|
+
config_overwrite: Overwrite some hyperparameters defined by the yaml file with some new values. E.g.
|
|
376
|
+
`["OUTPUT.FRCNN_NMS_THRESH=0.3","OUTPUT.RESULT_SCORE_THRESH=0.6"]`.
|
|
377
|
+
device: "cpu" or "cuda". If not specified will auto select depending on what is available
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
`GeneralizedRCNN` model
|
|
354
381
|
"""
|
|
355
382
|
|
|
356
383
|
device = get_torch_device(device)
|
|
@@ -379,14 +406,13 @@ class D2FrcnnTracingDetector(D2FrcnnDetectorMixin):
|
|
|
379
406
|
be installed. The associated config setting only contains parameters that are involved in pre-and post-processing.
|
|
380
407
|
Depending on running the model with CUDA or on a CPU, it will need the appropriate exported model.
|
|
381
408
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
on this topic, see <https://github.com/facebookresearch/detectron2/issues/978> .
|
|
409
|
+
Note:
|
|
410
|
+
There are no adjustment to the original implementation of Detectron2. Only one post-processing step is followed
|
|
411
|
+
by the standard D2 output that takes into account of the situation that detected objects are disjoint. For more
|
|
412
|
+
infos on this topic, see <https://github.com/facebookresearch/detectron2/issues/978>.
|
|
387
413
|
|
|
388
414
|
Example:
|
|
389
|
-
|
|
415
|
+
```python
|
|
390
416
|
config_path = ModelCatalog.get_full_path_configs("dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml")
|
|
391
417
|
weights_path = ModelDownloadManager.maybe_download_weights_and_configs("item/d2_model-800000-layout.pkl")
|
|
392
418
|
categories = ModelCatalog.get_profile("item/d2_model-800000-layout.pkl").categories
|
|
@@ -394,7 +420,7 @@ class D2FrcnnTracingDetector(D2FrcnnDetectorMixin):
|
|
|
394
420
|
d2_predictor = D2FrcnnDetector(config_path,weights_path,categories)
|
|
395
421
|
|
|
396
422
|
detection_results = d2_predictor.predict(bgr_image_np_array)
|
|
397
|
-
|
|
423
|
+
```
|
|
398
424
|
"""
|
|
399
425
|
|
|
400
426
|
def __init__(
|
|
@@ -406,18 +432,19 @@ class D2FrcnnTracingDetector(D2FrcnnDetectorMixin):
|
|
|
406
432
|
filter_categories: Optional[Sequence[TypeOrStr]] = None,
|
|
407
433
|
):
|
|
408
434
|
"""
|
|
409
|
-
Set up the
|
|
410
|
-
|
|
411
|
-
:
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
435
|
+
Set up the Torchscript predictor.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
path_yaml: The path to the `yaml` config. If the model is built using several config files, always use
|
|
439
|
+
the highest level `.yaml` file.
|
|
440
|
+
path_weights: The path to the model checkpoint.
|
|
441
|
+
categories: A dict with key (indices) and values (category names). Index 0 must be reserved for a
|
|
442
|
+
dummy `BG` category. Note, that this convention is different from the builtin D2 framework,
|
|
443
|
+
where models in the model zoo are trained with `BG` class having the highest index.
|
|
444
|
+
config_overwrite: Overwrite some hyperparameters defined by the yaml file with some new values. E.g.
|
|
445
|
+
`["OUTPUT.FRCNN_NMS_THRESH=0.3","OUTPUT.RESULT_SCORE_THRESH=0.6"]`.
|
|
446
|
+
filter_categories: The model might return objects that are not supposed to be predicted and that should
|
|
447
|
+
be filtered. Pass a list of category names that must not be returned
|
|
421
448
|
"""
|
|
422
449
|
|
|
423
450
|
super().__init__(categories, filter_categories)
|
|
@@ -441,15 +468,20 @@ class D2FrcnnTracingDetector(D2FrcnnDetectorMixin):
|
|
|
441
468
|
cfg = set_config_by_yaml(path_yaml)
|
|
442
469
|
config_overwrite = config_overwrite if config_overwrite else []
|
|
443
470
|
config_overwrite.extend([f"MODEL.WEIGHTS={os.fspath(path_weights)}"])
|
|
471
|
+
cfg.freeze(False)
|
|
444
472
|
cfg.update_args(config_overwrite)
|
|
473
|
+
cfg.freeze()
|
|
445
474
|
return cfg
|
|
446
475
|
|
|
447
476
|
def predict(self, np_img: PixelValues) -> list[DetectionResult]:
|
|
448
477
|
"""
|
|
449
478
|
Prediction per image.
|
|
450
479
|
|
|
451
|
-
:
|
|
452
|
-
|
|
480
|
+
Args:
|
|
481
|
+
np_img: image as `np.array`
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
A list of `DetectionResult`s
|
|
453
485
|
"""
|
|
454
486
|
detection_results = d2_jit_predict_image(
|
|
455
487
|
np_img,
|
|
@@ -478,10 +510,13 @@ class D2FrcnnTracingDetector(D2FrcnnDetectorMixin):
|
|
|
478
510
|
@staticmethod
|
|
479
511
|
def get_wrapped_model(path_weights: PathLikeOrStr) -> torch.jit.ScriptModule:
|
|
480
512
|
"""
|
|
481
|
-
Get the wrapped model. Useful if one do not want to build the wrapper but only needs the instantiated model.
|
|
513
|
+
Get the wrapped model. Useful, if one do not want to build the wrapper but only needs the instantiated model.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
path_weights: The path to the model checkpoint. The model must be exported as Torchscript.
|
|
482
517
|
|
|
483
|
-
:
|
|
484
|
-
|
|
518
|
+
Returns:
|
|
519
|
+
`torch.jit.ScriptModule` model
|
|
485
520
|
"""
|
|
486
521
|
with open(path_weights, "rb") as file:
|
|
487
522
|
buffer = io.BytesIO(file.read())
|
deepdoctection/extern/deskew.py
CHANGED
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Jdeskew estimator and rotator: <https://github.com/phamquiluan/jdeskew>
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
from lazy_imports import try_import
|
|
@@ -47,14 +48,19 @@ class Jdeskewer(ImageTransformer):
|
|
|
47
48
|
"""
|
|
48
49
|
Rotation of the image according to the angle determined by the jdeskew estimator.
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
Example:
|
|
52
|
+
```python
|
|
53
|
+
jdeskew_predictor = Jdeskewer()
|
|
54
|
+
detection_result = jdeskew_predictor.predict(np_image)
|
|
55
|
+
jdeskew_predictor.transform(np_image, DetectionResult(angle=5.0))
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
np_img: image as `np.array`
|
|
60
|
+
specification: `DetectionResult` with angle value
|
|
54
61
|
|
|
55
|
-
:
|
|
56
|
-
|
|
57
|
-
:return: image rotated by the angle
|
|
62
|
+
Returns:
|
|
63
|
+
image rotated by the angle
|
|
58
64
|
"""
|
|
59
65
|
if abs(specification.angle) > self.min_angle_rotation: # type: ignore
|
|
60
66
|
return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
|
|
@@ -64,8 +70,11 @@ class Jdeskewer(ImageTransformer):
|
|
|
64
70
|
"""
|
|
65
71
|
Predict the angle of the image to deskew it.
|
|
66
72
|
|
|
67
|
-
:
|
|
68
|
-
|
|
73
|
+
Args:
|
|
74
|
+
np_img: image as `np.array`
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
`DetectionResult` with angle value
|
|
69
78
|
"""
|
|
70
79
|
return DetectionResult(angle=round(float(get_angle(np_img)), 4))
|
|
71
80
|
|