deepdoctection 0.31__py3-none-any.whl → 0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (91) hide show
  1. deepdoctection/__init__.py +35 -28
  2. deepdoctection/analyzer/dd.py +30 -24
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/datapoint/annotation.py +2 -1
  5. deepdoctection/datapoint/box.py +2 -1
  6. deepdoctection/datapoint/image.py +13 -7
  7. deepdoctection/datapoint/view.py +95 -24
  8. deepdoctection/datasets/__init__.py +1 -4
  9. deepdoctection/datasets/adapter.py +5 -2
  10. deepdoctection/datasets/base.py +5 -3
  11. deepdoctection/datasets/info.py +2 -2
  12. deepdoctection/datasets/instances/doclaynet.py +3 -2
  13. deepdoctection/datasets/instances/fintabnet.py +2 -1
  14. deepdoctection/datasets/instances/funsd.py +2 -1
  15. deepdoctection/datasets/instances/iiitar13k.py +5 -2
  16. deepdoctection/datasets/instances/layouttest.py +2 -1
  17. deepdoctection/datasets/instances/publaynet.py +2 -2
  18. deepdoctection/datasets/instances/pubtables1m.py +6 -3
  19. deepdoctection/datasets/instances/pubtabnet.py +2 -1
  20. deepdoctection/datasets/instances/rvlcdip.py +2 -1
  21. deepdoctection/datasets/instances/xfund.py +2 -1
  22. deepdoctection/eval/__init__.py +1 -4
  23. deepdoctection/eval/cocometric.py +2 -1
  24. deepdoctection/eval/eval.py +17 -13
  25. deepdoctection/eval/tedsmetric.py +14 -11
  26. deepdoctection/eval/tp_eval_callback.py +9 -3
  27. deepdoctection/extern/__init__.py +2 -7
  28. deepdoctection/extern/d2detect.py +24 -32
  29. deepdoctection/extern/deskew.py +4 -2
  30. deepdoctection/extern/doctrocr.py +75 -81
  31. deepdoctection/extern/fastlang.py +4 -2
  32. deepdoctection/extern/hfdetr.py +22 -28
  33. deepdoctection/extern/hflayoutlm.py +335 -103
  34. deepdoctection/extern/hflm.py +225 -0
  35. deepdoctection/extern/model.py +56 -47
  36. deepdoctection/extern/pdftext.py +8 -4
  37. deepdoctection/extern/pt/__init__.py +1 -3
  38. deepdoctection/extern/pt/nms.py +6 -2
  39. deepdoctection/extern/pt/ptutils.py +27 -19
  40. deepdoctection/extern/texocr.py +4 -2
  41. deepdoctection/extern/tp/tfutils.py +43 -9
  42. deepdoctection/extern/tp/tpcompat.py +10 -7
  43. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  44. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  45. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  46. deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
  47. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  48. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
  49. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  50. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
  51. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  52. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
  53. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
  54. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
  55. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  56. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  57. deepdoctection/extern/tp/tpfrcnn/preproc.py +7 -3
  58. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  60. deepdoctection/extern/tpdetect.py +5 -8
  61. deepdoctection/mapper/__init__.py +3 -8
  62. deepdoctection/mapper/d2struct.py +8 -6
  63. deepdoctection/mapper/hfstruct.py +6 -1
  64. deepdoctection/mapper/laylmstruct.py +163 -20
  65. deepdoctection/mapper/maputils.py +3 -1
  66. deepdoctection/mapper/misc.py +6 -3
  67. deepdoctection/mapper/tpstruct.py +2 -2
  68. deepdoctection/pipe/__init__.py +1 -1
  69. deepdoctection/pipe/common.py +11 -9
  70. deepdoctection/pipe/concurrency.py +2 -1
  71. deepdoctection/pipe/layout.py +3 -1
  72. deepdoctection/pipe/lm.py +32 -64
  73. deepdoctection/pipe/order.py +142 -35
  74. deepdoctection/pipe/refine.py +8 -14
  75. deepdoctection/pipe/{cell.py → sub_layout.py} +1 -1
  76. deepdoctection/train/__init__.py +6 -12
  77. deepdoctection/train/d2_frcnn_train.py +21 -16
  78. deepdoctection/train/hf_detr_train.py +18 -11
  79. deepdoctection/train/hf_layoutlm_train.py +118 -101
  80. deepdoctection/train/tp_frcnn_train.py +21 -19
  81. deepdoctection/utils/env_info.py +41 -117
  82. deepdoctection/utils/logger.py +1 -0
  83. deepdoctection/utils/mocks.py +93 -0
  84. deepdoctection/utils/settings.py +1 -0
  85. deepdoctection/utils/viz.py +4 -3
  86. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/METADATA +27 -18
  87. deepdoctection-0.32.dist-info/RECORD +146 -0
  88. deepdoctection-0.31.dist-info/RECORD +0 -144
  89. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
  90. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/WHEEL +0 -0
  91. {deepdoctection-0.31.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0
@@ -18,6 +18,8 @@
18
18
  """
19
19
  Module for the base class of datasets.
20
20
  """
21
+ from __future__ import annotations
22
+
21
23
  import json
22
24
  import os
23
25
  import pprint
@@ -33,7 +35,7 @@ from ..dataflow import CacheData, ConcatData, CustomDataFromList, DataFlow
33
35
  from ..datapoint.image import Image
34
36
  from ..utils.detection_types import Pathlike
35
37
  from ..utils.logger import LoggingRecord, logger
36
- from ..utils.settings import ObjectTypes, TypeOrStr, get_type
38
+ from ..utils.settings import DatasetType, ObjectTypes, TypeOrStr, get_type
37
39
  from .dataflow_builder import DataFlowBaseBuilder
38
40
  from .info import DatasetCategories, DatasetInfo, get_merged_categories
39
41
 
@@ -423,7 +425,7 @@ class CustomDataset(DatasetBase):
423
425
  """
424
426
 
425
427
  self.name = name
426
- self.type = get_type(dataset_type)
428
+ self.type: DatasetType = get_type(dataset_type) # type: ignore
427
429
  self.location = location
428
430
  self.init_categories = init_categories
429
431
  if init_sub_categories is None:
@@ -449,7 +451,7 @@ class CustomDataset(DatasetBase):
449
451
  return self.dataflow_builder
450
452
 
451
453
  @staticmethod
452
- def from_dataset_card(file_path: str, dataflow_builder: Type[DataFlowBaseBuilder]) -> "CustomDataset":
454
+ def from_dataset_card(file_path: str, dataflow_builder: Type[DataFlowBaseBuilder]) -> CustomDataset:
453
455
  """
454
456
  This static method creates a CustomDataset instance from a dataset card.
455
457
 
@@ -24,7 +24,7 @@ from dataclasses import dataclass, field
24
24
  from itertools import chain
25
25
  from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence, Set, Union, no_type_check, overload
26
26
 
27
- from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
27
+ from ..utils.settings import DatasetType, ObjectTypes, TypeOrStr, get_type
28
28
  from ..utils.utils import call_only_once
29
29
 
30
30
  __all__ = ["DatasetInfo", "DatasetCategories", "get_merged_categories"]
@@ -89,7 +89,7 @@ class DatasetInfo:
89
89
  license: str = field(default="")
90
90
  url: Union[str, Sequence[str]] = field(default="")
91
91
  splits: Mapping[str, str] = field(default_factory=dict)
92
- type: ObjectTypes = field(default=DefaultType.default_type)
92
+ type: DatasetType = field(default=DatasetType.default)
93
93
 
94
94
  def get_split(self, key: str) -> str:
95
95
  """
@@ -25,6 +25,7 @@ Module for DocLayNet dataset. Place the dataset as follows
25
25
  ├── PNG
26
26
  │ ├── 0a0d43e301facee9e99cc33b9b16e732dd207135f4027e75f6aea2bf117535a2.png
27
27
  """
28
+ from __future__ import annotations
28
29
 
29
30
  import os
30
31
  from typing import Mapping, Sequence, Union
@@ -109,7 +110,7 @@ class DocLayNet(DatasetBase):
109
110
  def _categories(self) -> DatasetCategories:
110
111
  return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
111
112
 
112
- def _builder(self) -> "DocLayNetBuilder":
113
+ def _builder(self) -> DocLayNetBuilder:
113
114
  return DocLayNetBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
114
115
 
115
116
 
@@ -209,7 +210,7 @@ class DocLayNetSeq(DatasetBase):
209
210
  def _categories(self) -> DatasetCategories:
210
211
  return DatasetCategories(init_categories=_INIT_CATEGORIES_SEQ)
211
212
 
212
- def _builder(self) -> "DocLayNetSeqBuilder":
213
+ def _builder(self) -> DocLayNetSeqBuilder:
213
214
  return DocLayNetSeqBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
214
215
 
215
216
 
@@ -30,6 +30,7 @@ Module for Fintabnet dataset. Place the dataset as follows
30
30
  ├── FinTabNet_1.0.0_table_train.jsonl
31
31
  ├── FinTabNet_1.0.0_table_val.jsonl
32
32
  """
33
+ from __future__ import annotations
33
34
 
34
35
  from pathlib import Path
35
36
  from typing import List, Mapping, Sequence, Union
@@ -133,7 +134,7 @@ class Fintabnet(_BuiltInDataset):
133
134
  def _categories(self) -> DatasetCategories:
134
135
  return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
135
136
 
136
- def _builder(self) -> "FintabnetBuilder":
137
+ def _builder(self) -> FintabnetBuilder:
137
138
  return FintabnetBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
138
139
 
139
140
 
@@ -32,6 +32,7 @@ Module for Funsd dataset. Install the dataset following the folder structure
32
32
  │ ├── images
33
33
  │ │ ├── ...
34
34
  """
35
+ from __future__ import annotations
35
36
 
36
37
  import os
37
38
  from typing import Dict, List, Mapping, Union
@@ -120,7 +121,7 @@ class Funsd(_BuiltInDataset):
120
121
  def _categories(self) -> DatasetCategories:
121
122
  return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
122
123
 
123
- def _builder(self) -> "FunsdBuilder":
124
+ def _builder(self) -> FunsdBuilder:
124
125
  return FunsdBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
125
126
 
126
127
 
@@ -35,10 +35,13 @@ Module for IIITar13K dataset. Install the dataset following the folder structure
35
35
  │ ├── ...
36
36
 
37
37
  """
38
+ from __future__ import annotations
38
39
 
39
40
  import os
40
41
  from typing import Mapping, Union
41
42
 
43
+ from lazy_imports import try_import
44
+
42
45
  from ...dataflow import DataFlow, MapData, SerializerFiles
43
46
  from ...datasets.info import DatasetInfo
44
47
  from ...mapper.maputils import curry
@@ -53,7 +56,7 @@ from ..dataflow_builder import DataFlowBaseBuilder
53
56
  from ..info import DatasetCategories
54
57
  from ..registry import dataset_registry
55
58
 
56
- if lxml_available():
59
+ with try_import() as import_guard:
57
60
  from lxml import etree
58
61
 
59
62
  _NAME = "iiitar13k"
@@ -99,7 +102,7 @@ class IIITar13K(_BuiltInDataset):
99
102
  def _categories(self) -> DatasetCategories:
100
103
  return DatasetCategories(init_categories=_INIT_CATEGORIES)
101
104
 
102
- def _builder(self) -> "IIITar13KBuilder":
105
+ def _builder(self) -> IIITar13KBuilder:
103
106
  return IIITar13KBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
104
107
 
105
108
 
@@ -24,6 +24,7 @@ Module for Testlayout dataset. Install the dataset following the folder structur
24
24
  ├── test
25
25
  │ ├── xrf_layout_test.jsonl
26
26
  """
27
+ from __future__ import annotations
27
28
 
28
29
  from typing import Mapping, Union
29
30
 
@@ -77,7 +78,7 @@ class LayoutTest(_BuiltInDataset):
77
78
  def _categories(self) -> DatasetCategories:
78
79
  return DatasetCategories(init_categories=_INIT_CATEGORIES)
79
80
 
80
- def _builder(self) -> "LayoutTestBuilder":
81
+ def _builder(self) -> LayoutTestBuilder:
81
82
  return LayoutTestBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
82
83
 
83
84
 
@@ -28,7 +28,7 @@ Module for Publaynet dataset. Place the dataset as follows
28
28
  ├── train.json
29
29
  ├── val.json
30
30
  """
31
-
31
+ from __future__ import annotations
32
32
 
33
33
  from typing import Mapping, Union
34
34
 
@@ -84,7 +84,7 @@ class Publaynet(_BuiltInDataset):
84
84
  def _categories(self) -> DatasetCategories:
85
85
  return DatasetCategories(init_categories=_INIT_CATEGORIES)
86
86
 
87
- def _builder(self) -> "PublaynetBuilder":
87
+ def _builder(self) -> PublaynetBuilder:
88
88
  return PublaynetBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
89
89
 
90
90
 
@@ -37,10 +37,13 @@ Module for PubTables1M-Detection-PASCAL-VOC dataset. Install the dataset followi
37
37
  ├── PubTables-1M-Structure_Annotations_Test
38
38
  ├── PubTables-1M-Structure_Images_Test
39
39
  """
40
+ from __future__ import annotations
40
41
 
41
42
  import os
42
43
  from typing import Mapping, Union
43
44
 
45
+ from lazy_imports import try_import
46
+
44
47
  from ...dataflow import DataFlow, MapData, SerializerFiles
45
48
  from ...datasets.info import DatasetInfo
46
49
  from ...mapper.cats import filter_cat
@@ -56,7 +59,7 @@ from ..dataflow_builder import DataFlowBaseBuilder
56
59
  from ..info import DatasetCategories
57
60
  from ..registry import dataset_registry
58
61
 
59
- if lxml_available():
62
+ with try_import() as import_guard:
60
63
  from lxml import etree
61
64
 
62
65
  _NAME = "pubtables1m_det"
@@ -102,7 +105,7 @@ class Pubtables1MDet(_BuiltInDataset):
102
105
  def _categories(self) -> DatasetCategories:
103
106
  return DatasetCategories(init_categories=_INIT_CATEGORIES_DET)
104
107
 
105
- def _builder(self) -> "Pubtables1MBuilder":
108
+ def _builder(self) -> Pubtables1MBuilder:
106
109
  return Pubtables1MBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
107
110
 
108
111
 
@@ -225,7 +228,7 @@ class Pubtables1MStruct(_BuiltInDataset):
225
228
  def _categories(self) -> DatasetCategories:
226
229
  return DatasetCategories(init_categories=_INIT_CATEGORIES_STRUCT)
227
230
 
228
- def _builder(self) -> "Pubtables1MBuilderStruct":
231
+ def _builder(self) -> Pubtables1MBuilderStruct:
229
232
  return Pubtables1MBuilderStruct(location=_LOCATION, annotation_files=_ANNOTATION_FILES_STRUCT)
230
233
 
231
234
 
@@ -27,6 +27,7 @@ Module for Pubtabnet dataset. Place the dataset as follows
27
27
  │ ├── PMC3.png
28
28
  ├── PubTabNet_2.0.0.jsonl
29
29
  """
30
+ from __future__ import annotations
30
31
 
31
32
  from typing import Dict, List, Mapping, Union
32
33
 
@@ -119,7 +120,7 @@ class Pubtabnet(_BuiltInDataset):
119
120
  def _categories(self) -> DatasetCategories:
120
121
  return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
121
122
 
122
- def _builder(self) -> "PubtabnetBuilder":
123
+ def _builder(self) -> PubtabnetBuilder:
123
124
  return PubtabnetBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
124
125
 
125
126
 
@@ -29,6 +29,7 @@ Module for Publaynet dataset. Place the dataset as follows
29
29
  │ ├── train.txt
30
30
  │ ├── val.txt
31
31
  """
32
+ from __future__ import annotations
32
33
 
33
34
  import os
34
35
  from typing import Mapping, Union
@@ -102,7 +103,7 @@ class Rvlcdip(_BuiltInDataset):
102
103
  def _categories(self) -> DatasetCategories:
103
104
  return DatasetCategories(init_categories=_INIT_CATEGORIES)
104
105
 
105
- def _builder(self) -> "RvlcdipBuilder":
106
+ def _builder(self) -> RvlcdipBuilder:
106
107
  return RvlcdipBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
107
108
 
108
109
 
@@ -27,6 +27,7 @@ Module for XFUND dataset. Install the dataset following the folder structure
27
27
  │ ├── de_val_0.jpg
28
28
  ├── es_train
29
29
  """
30
+ from __future__ import annotations
30
31
 
31
32
  import json
32
33
  import os
@@ -108,7 +109,7 @@ class Xfund(_BuiltInDataset):
108
109
  def _categories(self) -> DatasetCategories:
109
110
  return DatasetCategories(init_categories=_INIT_CATEGORIES, init_sub_categories=_SUB_CATEGORIES)
110
111
 
111
- def _builder(self) -> "XfundBuilder":
112
+ def _builder(self) -> XfundBuilder:
112
113
  return XfundBuilder(location=_LOCATION, annotation_files=_ANNOTATION_FILES)
113
114
 
114
115
 
@@ -20,12 +20,9 @@ Init file for eval package. Contains metrics (customized for special tasks), eva
20
20
  for training.
21
21
  """
22
22
 
23
- from ..utils.file_utils import apted_available
24
23
  from .accmetric import *
25
24
  from .base import *
26
25
  from .cocometric import *
27
26
  from .eval import *
28
27
  from .registry import *
29
-
30
- if apted_available():
31
- from .tedsmetric import *
28
+ from .tedsmetric import *
@@ -23,6 +23,7 @@ from copy import copy
23
23
  from typing import Dict, List, Optional, Tuple, Union
24
24
 
25
25
  import numpy as np
26
+ from lazy_imports import try_import
26
27
 
27
28
  from ..dataflow import DataFlow
28
29
  from ..datasets.info import DatasetCategories
@@ -33,7 +34,7 @@ from ..utils.file_utils import Requirement, cocotools_available, get_cocotools_r
33
34
  from .base import MetricBase
34
35
  from .registry import metric_registry
35
36
 
36
- if cocotools_available():
37
+ with try_import() as cc_import_guard:
37
38
  from pycocotools.coco import COCO
38
39
  from pycocotools.cocoeval import COCOeval
39
40
 
@@ -19,36 +19,35 @@
19
19
  """
20
20
  Module for `Evaluator`
21
21
  """
22
-
23
- __all__ = ["Evaluator"]
22
+ from __future__ import annotations
24
23
 
25
24
  from copy import deepcopy
26
- from typing import Any, Dict, List, Literal, Mapping, Optional, Type, Union, overload
25
+ from typing import Any, Dict, Generator, List, Literal, Mapping, Optional, Type, Union, overload
27
26
 
28
27
  import numpy as np
28
+ from lazy_imports import try_import
29
29
 
30
30
  from ..dataflow import CacheData, DataFlow, DataFromList, MapData
31
31
  from ..datapoint.image import Image
32
32
  from ..datasets.base import DatasetBase
33
33
  from ..mapper.cats import filter_cat, remove_cats
34
+ from ..mapper.d2struct import to_wandb_image
34
35
  from ..mapper.misc import maybe_load_image, maybe_remove_image, maybe_remove_image_from_category
35
36
  from ..pipe.base import LanguageModelPipelineComponent, PredictorPipelineComponent
36
37
  from ..pipe.common import PageParsingService
37
38
  from ..pipe.concurrency import MultiThreadPipelineComponent
38
39
  from ..pipe.doctectionpipe import DoctectionPipe
39
40
  from ..utils.detection_types import ImageType
40
- from ..utils.file_utils import detectron2_available, wandb_available
41
41
  from ..utils.logger import LoggingRecord, logger
42
42
  from ..utils.settings import DatasetType, LayoutType, TypeOrStr, get_type
43
43
  from ..utils.viz import interactive_imshow
44
44
  from .base import MetricBase
45
45
 
46
- if wandb_available():
46
+ with try_import() as wb_import_guard:
47
47
  import wandb # pylint:disable=W0611
48
48
  from wandb import Artifact, Table
49
49
 
50
- if wandb_available() and detectron2_available():
51
- from ..mapper.d2struct import to_wandb_image
50
+ __all__ = ["Evaluator"]
52
51
 
53
52
 
54
53
  class Evaluator:
@@ -94,7 +93,7 @@ class Evaluator:
94
93
  component_or_pipeline: Union[PredictorPipelineComponent, LanguageModelPipelineComponent, DoctectionPipe],
95
94
  metric: Union[Type[MetricBase], MetricBase],
96
95
  num_threads: int = 2,
97
- run: Optional["wandb.sdk.wandb_run.Run"] = None,
96
+ run: Optional[wandb.sdk.wandb_run.Run] = None,
98
97
  ) -> None:
99
98
  """
100
99
  Evaluating a pipeline component on a dataset with a given metric.
@@ -275,7 +274,7 @@ class Evaluator:
275
274
 
276
275
  return df_pr
277
276
 
278
- def compare(self, interactive: bool = False, **kwargs: Union[str, int]) -> Optional[ImageType]:
277
+ def compare(self, interactive: bool = False, **kwargs: Union[str, int]) -> Generator[ImageType, None, None]:
279
278
  """
280
279
  Visualize ground truth and prediction datapoint. Given a dataflow config it will run predictions per sample
281
280
  and concat the prediction image (with predicted bounding boxes) with ground truth image.
@@ -292,6 +291,8 @@ class Evaluator:
292
291
  show_layouts = kwargs.pop("show_layouts", True)
293
292
  show_table_structure = kwargs.pop("show_table_structure", True)
294
293
  show_words = kwargs.pop("show_words", False)
294
+ show_token_class = kwargs.pop("show_token_class", True)
295
+ ignore_default_token_class = kwargs.pop("ignore_default_token_class", False)
295
296
 
296
297
  df_gt = self.dataset.dataflow.build(**kwargs)
297
298
  df_pr = self.dataset.dataflow.build(**kwargs)
@@ -321,18 +322,21 @@ class Evaluator:
321
322
  show_layouts=show_layouts,
322
323
  show_table_structure=show_table_structure,
323
324
  show_words=show_words,
325
+ show_token_class=show_token_class,
326
+ ignore_default_token_class=ignore_default_token_class,
324
327
  ), dp_pred.viz(
325
328
  show_tables=show_tables,
326
329
  show_layouts=show_layouts,
327
330
  show_table_structure=show_table_structure,
328
331
  show_words=show_words,
332
+ show_token_class=show_token_class,
333
+ ignore_default_token_class=ignore_default_token_class,
329
334
  )
330
335
  img_concat = np.concatenate((img_gt, img_pred), axis=1)
331
336
  if interactive:
332
337
  interactive_imshow(img_concat)
333
338
  else:
334
- return img_concat
335
- return None
339
+ yield img_concat
336
340
 
337
341
 
338
342
  class WandbTableAgent:
@@ -350,7 +354,7 @@ class WandbTableAgent:
350
354
 
351
355
  def __init__(
352
356
  self,
353
- wandb_run: "wandb.sdk.wandb_run.Run",
357
+ wandb_run: wandb.sdk.wandb_run.Run,
354
358
  dataset_name: str,
355
359
  num_samples: int,
356
360
  categories: Mapping[str, TypeOrStr],
@@ -409,7 +413,7 @@ class WandbTableAgent:
409
413
  self._table_rows = []
410
414
  self._counter = 0
411
415
 
412
- def _build_table(self) -> "Table":
416
+ def _build_table(self) -> Table:
413
417
  """
414
418
  Builds wandb.Table object for logging evaluation
415
419
 
@@ -20,28 +20,31 @@ import statistics
20
20
  from collections import defaultdict, deque
21
21
  from typing import Any, List, Optional, Tuple
22
22
 
23
+ from lazy_imports import try_import
24
+
23
25
  from ..dataflow import DataFlow, DataFromList, MapData, MultiThreadMapData
24
26
  from ..datapoint.view import Page
25
27
  from ..datasets.base import DatasetCategories
26
28
  from ..utils.detection_types import JsonDict
27
- from ..utils.file_utils import (
28
- Requirement,
29
- apted_available,
30
- distance_available,
31
- get_apted_requirement,
32
- get_distance_requirement,
33
- get_lxml_requirement,
34
- lxml_available,
35
- )
29
+ from ..utils.file_utils import Requirement, get_apted_requirement, get_distance_requirement, get_lxml_requirement
36
30
  from ..utils.logger import LoggingRecord, logger
37
31
  from ..utils.settings import LayoutType
38
32
  from .base import MetricBase
39
33
  from .registry import metric_registry
40
34
 
41
- if distance_available() and lxml_available() and apted_available():
42
- import distance # type: ignore
35
+ with try_import() as ap_import_guard:
43
36
  from apted import APTED, Config # type: ignore
44
37
  from apted.helpers import Tree # type: ignore
38
+
39
+
40
+ if not ap_import_guard.is_successful():
41
+ from ..utils.mocks import Config, Tree
42
+
43
+
44
+ with try_import() as ds_import_guard:
45
+ import distance # type: ignore
46
+
47
+ with try_import() as lx_import_guard:
45
48
  from lxml import etree
46
49
 
47
50
 
@@ -19,13 +19,16 @@
19
19
  Module for EvalCallback in Tensorpack
20
20
  """
21
21
 
22
+ from __future__ import annotations
23
+
22
24
  from itertools import count
23
25
  from typing import Mapping, Optional, Sequence, Type, Union
24
26
 
27
+ from lazy_imports import try_import
28
+
25
29
  from ..datasets import DatasetBase
26
30
  from ..extern.tpdetect import TPFrcnnDetector
27
31
  from ..pipe.base import PredictorPipelineComponent
28
- from ..utils.file_utils import tensorpack_available
29
32
  from ..utils.logger import LoggingRecord, logger
30
33
  from ..utils.metacfg import AttrDict
31
34
  from ..utils.settings import ObjectTypes
@@ -33,12 +36,15 @@ from .base import MetricBase
33
36
  from .eval import Evaluator
34
37
 
35
38
  # pylint: disable=import-error
36
- if tensorpack_available():
39
+ with try_import() as import_guard:
37
40
  from tensorpack.callbacks import Callback
38
41
  from tensorpack.predict import OnlinePredictor
39
42
  from tensorpack.utils.gpu import get_num_gpu
40
43
  # pylint: enable=import-error
41
44
 
45
+ if not import_guard.is_successful():
46
+ from ..utils.mocks import Callback
47
+
42
48
 
43
49
  # The following class is modified from
44
50
  # https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/eval.py
@@ -53,7 +59,7 @@ class EvalCallback(Callback): # pylint: disable=R0903
53
59
 
54
60
  _chief_only = False
55
61
 
56
- def __init__(
62
+ def __init__( # pylint: disable=W0231
57
63
  self,
58
64
  dataset: DatasetBase,
59
65
  category_names: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]],
@@ -19,8 +19,8 @@
19
19
  Wrappers for models of external libraries as well as implementation of the Cascade-RCNN model of Tensorpack.
20
20
  """
21
21
 
22
- from ..utils.file_utils import detectron2_available, tensorpack_available
23
22
  from .base import *
23
+ from .d2detect import *
24
24
  from .deskew import *
25
25
  from .doctrocr import *
26
26
  from .fastlang import *
@@ -30,9 +30,4 @@ from .model import *
30
30
  from .pdftext import *
31
31
  from .tessocr import *
32
32
  from .texocr import * # type: ignore
33
-
34
- if tensorpack_available():
35
- from .tpdetect import *
36
-
37
- if detectron2_available():
38
- from .d2detect import *
33
+ from .tpdetect import *