custom-layoutparser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. custom_layoutparser-0.1.0.dist-info/METADATA +5 -0
  2. custom_layoutparser-0.1.0.dist-info/RECORD +36 -0
  3. custom_layoutparser-0.1.0.dist-info/WHEEL +5 -0
  4. custom_layoutparser-0.1.0.dist-info/top_level.txt +1 -0
  5. layoutparser/__init__.py +89 -0
  6. layoutparser/elements/__init__.py +25 -0
  7. layoutparser/elements/base.py +275 -0
  8. layoutparser/elements/errors.py +26 -0
  9. layoutparser/elements/layout.py +348 -0
  10. layoutparser/elements/layout_elements.py +1352 -0
  11. layoutparser/elements/utils.py +82 -0
  12. layoutparser/file_utils.py +235 -0
  13. layoutparser/io/__init__.py +2 -0
  14. layoutparser/io/basic.py +148 -0
  15. layoutparser/io/pdf.py +225 -0
  16. layoutparser/models/__init__.py +18 -0
  17. layoutparser/models/auto_layoutmodel.py +70 -0
  18. layoutparser/models/base_catalog.py +34 -0
  19. layoutparser/models/base_layoutmodel.py +88 -0
  20. layoutparser/models/detectron2/__init__.py +18 -0
  21. layoutparser/models/detectron2/catalog.py +142 -0
  22. layoutparser/models/detectron2/layoutmodel.py +168 -0
  23. layoutparser/models/effdet/__init__.py +16 -0
  24. layoutparser/models/effdet/catalog.py +88 -0
  25. layoutparser/models/effdet/layoutmodel.py +256 -0
  26. layoutparser/models/model_config.py +133 -0
  27. layoutparser/models/paddledetection/__init__.py +17 -0
  28. layoutparser/models/paddledetection/catalog.py +214 -0
  29. layoutparser/models/paddledetection/layoutmodel.py +297 -0
  30. layoutparser/ocr/__init__.py +16 -0
  31. layoutparser/ocr/base.py +41 -0
  32. layoutparser/ocr/gcv_agent.py +288 -0
  33. layoutparser/ocr/tesseract_agent.py +193 -0
  34. layoutparser/tools/__init__.py +5 -0
  35. layoutparser/tools/shape_operations.py +167 -0
  36. layoutparser/visualization.py +571 -0
@@ -0,0 +1,18 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .detectron2.layoutmodel import Detectron2LayoutModel
16
+ from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
17
+ from .effdet.layoutmodel import EfficientDetLayoutModel
18
+ from .auto_layoutmodel import AutoLayoutModel
@@ -0,0 +1,70 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional, Dict, Union, List
16
+ from .detectron2.layoutmodel import Detectron2LayoutModel
17
+ from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
18
+ from .effdet.layoutmodel import EfficientDetLayoutModel
19
+ from .model_config import (
20
+ is_lp_layout_model_config_any_format,
21
+ )
22
+
23
+ ALL_AVAILABLE_BACKENDS = {
24
+ Detectron2LayoutModel.DETECTOR_NAME: Detectron2LayoutModel,
25
+ PaddleDetectionLayoutModel.DETECTOR_NAME: PaddleDetectionLayoutModel,
26
+ EfficientDetLayoutModel.DETECTOR_NAME: EfficientDetLayoutModel,
27
+ }
28
+
29
+
30
+ def AutoLayoutModel(
31
+ config_path: str,
32
+ model_path: Optional[str] = None,
33
+ label_map: Optional[Dict]=None,
34
+ device: Optional[str]=None,
35
+ extra_config: Optional[Union[Dict, List]]=None,
36
+ ) -> "BaseLayoutModel":
37
+ """[summary]
38
+
39
+ Args:
40
+ config_path (:obj:`str`):
41
+ The path to the configuration file.
42
+ model_path (:obj:`str`, None):
43
+ The path to the saved weights of the model.
44
+ If set, overwrite the weights in the configuration file.
45
+ Defaults to `None`.
46
+ label_map (:obj:`dict`, optional):
47
+ The map from the model prediction (ids) to real
48
+ word labels (strings). If the config is from one of the supported
49
+ datasets, Layout Parser will automatically initialize the label_map.
50
+ Defaults to `None`.
51
+ device(:obj:`str`, optional):
52
+ Whether to use cuda or cpu devices. If not set, LayoutParser will
53
+ automatically determine the device to initialize the models on.
54
+ extra_config (:obj:`dict`, optional):
55
+ Extra configuration passed used for initializing the layout model.
56
+
57
+ Returns:
58
+ # BaseLayoutModel: the create LayoutModel instance
59
+ """
60
+ if not is_lp_layout_model_config_any_format(config_path):
61
+ raise ValueError(f"Invalid model config_path {config_path}")
62
+ for backend_name in ALL_AVAILABLE_BACKENDS:
63
+ if backend_name in config_path:
64
+ return ALL_AVAILABLE_BACKENDS[backend_name](
65
+ config_path,
66
+ model_path=model_path,
67
+ label_map=label_map,
68
+ extra_config=extra_config,
69
+ device=device,
70
+ )
@@ -0,0 +1,34 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from iopath.common.file_io import HTTPURLHandler
16
+ from iopath.common.file_io import PathManager as PathManagerBase
17
+
18
+ # A trick learned from https://github.com/facebookresearch/detectron2/blob/65faeb4779e4c142484deeece18dc958c5c9ad18/detectron2/utils/file_io.py#L3
19
+
20
+
21
+ class DropboxHandler(HTTPURLHandler):
22
+ """
23
+ Supports download and file check for dropbox links
24
+ """
25
+
26
+ def _get_supported_prefixes(self):
27
+ return ["https://www.dropbox.com"]
28
+
29
+ def _isfile(self, path):
30
+ return path in self.cache_map
31
+
32
+
33
+ PathManager = PathManagerBase()
34
+ PathManager.register_handler(DropboxHandler())
@@ -0,0 +1,88 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional, Tuple, Union, Dict
16
+ from abc import ABC, abstractmethod
17
+
18
+ from .model_config import LayoutModelConfig, add_identifier_for_config, layout_model_config_parser, is_lp_layout_model_config_any_format
19
+ from ..file_utils import requires_backends
20
+
21
+ class BaseLayoutModel(ABC):
22
+
23
+ # TODO: Build a metaclass for lazy module loader
24
+ @property
25
+ @abstractmethod
26
+ def DEPENDENCIES(self):
27
+ """DEPENDENCIES lists all necessary dependencies for the class."""
28
+ pass
29
+
30
+ @property
31
+ @abstractmethod
32
+ def DETECTOR_NAME(self):
33
+ pass
34
+
35
+ @property
36
+ @abstractmethod
37
+ def MODEL_CATALOG(self) -> Dict[str, Dict[str, str]]:
38
+ pass
39
+
40
+ @abstractmethod
41
+ def detect(self, image: Union["np.ndarray", "Image.Image"]):
42
+ pass
43
+
44
+
45
+ @abstractmethod
46
+ def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
47
+ """It will process the input images appropriately to the target format."""
48
+ pass
49
+
50
+ def _parse_config(self, config_path:str, identifier:str) -> Union[LayoutModelConfig, str]:
51
+
52
+ if is_lp_layout_model_config_any_format(config_path):
53
+ config_path = add_identifier_for_config(config_path, identifier)
54
+ for dataset_name in self.MODEL_CATALOG:
55
+ if dataset_name in config_path:
56
+ default_model_arch = list(self.MODEL_CATALOG[dataset_name].keys())[0]
57
+ # Use the first model_name for the dataset as the default_model_arch
58
+ return layout_model_config_parser(config_path, self.DETECTOR_NAME, default_model_arch)
59
+ raise ValueError(f"The config {config_path} is not a valid config for {self.__class__}, "
60
+ f"possibly because there aren't models trained for the specified dataset.")
61
+ else:
62
+ return config_path
63
+
64
+ def config_parser(self, config_path:str, model_path: Optional[str], allow_empty_path=False) -> Tuple[str, str]:
65
+
66
+ config_path = self._parse_config(config_path, "config")
67
+
68
+ if isinstance(config_path, str) and model_path is None:
69
+ if not allow_empty_path:
70
+ raise ValueError(
71
+ f"Invalid config and model path pairs ({(config_path, model_path)}):"
72
+ f"When config_path is a regular URL, the model_path should not be empty"
73
+ )
74
+ else:
75
+ return config_path, model_path
76
+ elif isinstance(config_path, LayoutModelConfig) and model_path is None:
77
+ model_path = config_path.dual()
78
+ else:
79
+ model_path = self._parse_config(model_path, "weight")
80
+
81
+ config_path = config_path if isinstance(config_path, str) else config_path.full
82
+ model_path = model_path if isinstance(model_path, str) else model_path.full
83
+ return config_path, model_path
84
+
85
+ def __new__(cls, *args, **kwargs):
86
+
87
+ requires_backends(cls, cls.DEPENDENCIES)
88
+ return super().__new__(cls)
@@ -0,0 +1,18 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from . import catalog as _UNUSED
16
+ # A trick learned from
17
+ # https://github.com/facebookresearch/detectron2/blob/62cf3a2b6840734d2717abdf96e2dd57ed6612a6/detectron2/checkpoint/__init__.py#L6
18
+ from .layoutmodel import Detectron2LayoutModel
@@ -0,0 +1,142 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from iopath.common.file_io import PathHandler
16
+
17
+ from ..base_catalog import PathManager
18
+
19
+ MODEL_CATALOG = {
20
+ "HJDataset": {
21
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6icw6at8m28a2ho/model_final.pth?dl=1",
22
+ "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/893paxpy5suvlx9/model_final.pth?dl=1",
23
+ "retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/yxsloxu3djt456i/model_final.pth?dl=1",
24
+ },
25
+ "PubLayNet": {
26
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/dgy9c10wykk4lq4/model_final.pth?dl=1",
27
+ "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/d9fc9tahfzyl6df/model_final.pth?dl=1",
28
+ "mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/57zjbwv6gh3srry/model_final.pth?dl=1",
29
+ },
30
+ "PrimaLayout": {
31
+ "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/h7th27jfv19rxiy/model_final.pth?dl=1"
32
+ },
33
+ "NewspaperNavigator": {
34
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6ewh6g8rqt2ev3a/model_final.pth?dl=1",
35
+ },
36
+ "TableBank": {
37
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/8v4uqmz1at9v72a/model_final.pth?dl=1",
38
+ "faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/6vzfk8lk9xvyitg/model_final.pth?dl=1",
39
+ },
40
+ "MFD": {
41
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7xel0i3iqpm2p8y/model_final.pth?dl=1",
42
+ },
43
+ }
44
+
45
+ CONFIG_CATALOG = {
46
+ "HJDataset": {
47
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/j4yseny2u0hn22r/config.yml?dl=1",
48
+ "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/4jmr3xanmxmjcf8/config.yml?dl=1",
49
+ "retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/z8a8ywozuyc5c2x/config.yml?dl=1",
50
+ },
51
+ "PubLayNet": {
52
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/f3b12qc4hc0yh4m/config.yml?dl=1",
53
+ "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/u9wbsfwz4y0ziki/config.yml?dl=1",
54
+ "mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/nau5ut6zgthunil/config.yaml?dl=1",
55
+ },
56
+ "PrimaLayout": {
57
+ "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/yc92x97k50abynt/config.yaml?dl=1"
58
+ },
59
+ "NewspaperNavigator": {
60
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml?dl=1",
61
+ },
62
+ "TableBank": {
63
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7cqle02do7ah7k4/config.yaml?dl=1",
64
+ "faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/h63n6nv51kfl923/config.yaml?dl=1",
65
+ },
66
+ "MFD": {
67
+ "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1",
68
+ },
69
+ }
70
+
71
+ # fmt: off
72
+ LABEL_MAP_CATALOG = {
73
+ "HJDataset": {
74
+ 1: "Page Frame",
75
+ 2: "Row",
76
+ 3: "Title Region",
77
+ 4: "Text Region",
78
+ 5: "Title",
79
+ 6: "Subtitle",
80
+ 7: "Other",
81
+ },
82
+ "PubLayNet": {
83
+ 0: "Text",
84
+ 1: "Title",
85
+ 2: "List",
86
+ 3: "Table",
87
+ 4: "Figure"},
88
+ "PrimaLayout": {
89
+ 1: "TextRegion",
90
+ 2: "ImageRegion",
91
+ 3: "TableRegion",
92
+ 4: "MathsRegion",
93
+ 5: "SeparatorRegion",
94
+ 6: "OtherRegion",
95
+ },
96
+ "NewspaperNavigator": {
97
+ 0: "Photograph",
98
+ 1: "Illustration",
99
+ 2: "Map",
100
+ 3: "Comics/Cartoon",
101
+ 4: "Editorial Cartoon",
102
+ 5: "Headline",
103
+ 6: "Advertisement",
104
+ },
105
+ "TableBank": {
106
+ 0: "Table"
107
+ },
108
+ "MFD": {
109
+ 1: "Equation"
110
+ },
111
+ }
112
+ # fmt: on
113
+
114
+
115
+ class LayoutParserDetectron2ModelHandler(PathHandler):
116
+ """
117
+ Resolve anything that's in LayoutParser model zoo.
118
+ """
119
+
120
+ PREFIX = "lp://detectron2/"
121
+
122
+ def _get_supported_prefixes(self):
123
+ return [self.PREFIX]
124
+
125
+ def _get_local_path(self, path, **kwargs):
126
+ model_name = path[len(self.PREFIX) :]
127
+
128
+ dataset_name, *model_name, data_type = model_name.split("/")
129
+
130
+ if data_type == "weight":
131
+ model_url = MODEL_CATALOG[dataset_name]["/".join(model_name)]
132
+ elif data_type == "config":
133
+ model_url = CONFIG_CATALOG[dataset_name]["/".join(model_name)]
134
+ else:
135
+ raise ValueError(f"Unknown data_type {data_type}")
136
+ return PathManager.get_local_path(model_url, **kwargs)
137
+
138
+ def _open(self, path, mode="r", **kwargs):
139
+ return PathManager.open(self._get_local_path(path), mode, **kwargs)
140
+
141
+
142
+ PathManager.register_handler(LayoutParserDetectron2ModelHandler())
@@ -0,0 +1,168 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Union
16
+ from PIL import Image
17
+ import numpy as np
18
+ import warnings
19
+
20
+ from .catalog import MODEL_CATALOG, PathManager, LABEL_MAP_CATALOG
21
+ from ..base_layoutmodel import BaseLayoutModel
22
+ from ...elements import Rectangle, TextBlock, Layout
23
+ from ...file_utils import is_torch_cuda_available, is_detectron2_available
24
+
25
+ if is_detectron2_available():
26
+ import detectron2.engine
27
+ import detectron2.config
28
+
29
+
30
+ __all__ = ["Detectron2LayoutModel"]
31
+
32
+
33
+ class Detectron2LayoutModel(BaseLayoutModel):
34
+ """Create a Detectron2-based Layout Detection Model
35
+
36
+ Args:
37
+ config_path (:obj:`str`):
38
+ The path to the configuration file.
39
+ model_path (:obj:`str`, None):
40
+ The path to the saved weights of the model.
41
+ If set, overwrite the weights in the configuration file.
42
+ Defaults to `None`.
43
+ label_map (:obj:`dict`, optional):
44
+ The map from the model prediction (ids) to real
45
+ word labels (strings). If the config is from one of the supported
46
+ datasets, Layout Parser will automatically initialize the label_map.
47
+ Defaults to `None`.
48
+ device(:obj:`str`, optional):
49
+ Whether to use cuda or cpu devices. If not set, LayoutParser will
50
+ automatically determine the device to initialize the models on.
51
+ extra_config (:obj:`list`, optional):
52
+ Extra configuration passed to the Detectron2 model
53
+ configuration. The argument will be used in the `merge_from_list
54
+ <https://detectron2.readthedocs.io/modules/config.html
55
+ #detectron2.config.CfgNode.merge_from_list>`_ function.
56
+ Defaults to `[]`.
57
+
58
+ Examples::
59
+ >>> import layoutparser as lp
60
+ >>> model = lp.Detectron2LayoutModel('lp://HJDataset/faster_rcnn_R_50_FPN_3x/config')
61
+ >>> model.detect(image)
62
+
63
+ """
64
+
65
+ DEPENDENCIES = ["detectron2"]
66
+ DETECTOR_NAME = "detectron2"
67
+ MODEL_CATALOG = MODEL_CATALOG
68
+
69
+ def __init__(
70
+ self,
71
+ config_path,
72
+ model_path=None,
73
+ label_map=None,
74
+ extra_config=None,
75
+ enforce_cpu=None,
76
+ device=None,
77
+ ):
78
+
79
+ if enforce_cpu is not None:
80
+ warnings.warn(
81
+ "Setting enforce_cpu is deprecated. Please set `device` instead.",
82
+ DeprecationWarning,
83
+ )
84
+
85
+ if extra_config is None:
86
+ extra_config = []
87
+
88
+ config_path, model_path = self.config_parser(
89
+ config_path, model_path, allow_empty_path=True
90
+ )
91
+ config_path = PathManager.get_local_path(config_path)
92
+
93
+ if label_map is None:
94
+ if config_path.startswith("lp://"):
95
+ dataset_name = config_path.lstrip("lp://").split("/")[1]
96
+ label_map = LABEL_MAP_CATALOG[dataset_name]
97
+ else:
98
+ label_map = {}
99
+
100
+ cfg = detectron2.config.get_cfg()
101
+ cfg.merge_from_file(config_path)
102
+ cfg.merge_from_list(extra_config)
103
+
104
+ if model_path is not None:
105
+ model_path = PathManager.get_local_path(model_path)
106
+ # Because it will be forwarded to the detectron2 paths
107
+ cfg.MODEL.WEIGHTS = model_path
108
+
109
+ if is_torch_cuda_available():
110
+ if device is None:
111
+ device = "cuda"
112
+ else:
113
+ device = "cpu"
114
+ cfg.MODEL.DEVICE = device
115
+
116
+ self.cfg = cfg
117
+
118
+ self.label_map = label_map
119
+ self._create_model()
120
+
121
+ def _create_model(self):
122
+ self.model = detectron2.engine.DefaultPredictor(self.cfg)
123
+
124
+ def gather_output(self, outputs):
125
+
126
+ instance_pred = outputs["instances"].to("cpu")
127
+
128
+ layout = Layout()
129
+ scores = instance_pred.scores.tolist()
130
+ boxes = instance_pred.pred_boxes.tensor.tolist()
131
+ labels = instance_pred.pred_classes.tolist()
132
+
133
+ for score, box, label in zip(scores, boxes, labels):
134
+ x_1, y_1, x_2, y_2 = box
135
+
136
+
137
+ label = self.label_map.get(label, label)
138
+
139
+ cur_block = TextBlock(
140
+ Rectangle(x_1, y_1, x_2, y_2), type=label, score=score
141
+ )
142
+ layout.append(cur_block)
143
+
144
+ return layout
145
+
146
+ def detect(self, image):
147
+ """Detect the layout of a given image.
148
+
149
+ Args:
150
+ image (:obj:`np.ndarray` or `PIL.Image`): The input image to detect.
151
+
152
+ Returns:
153
+ :obj:`~layoutparser.Layout`: The detected layout of the input image
154
+ """
155
+
156
+ image = self.image_loader(image)
157
+ outputs = self.model(image)
158
+ layout = self.gather_output(outputs)
159
+ return layout
160
+
161
+ def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
162
+ # Convert PIL Image Input
163
+ if isinstance(image, Image.Image):
164
+ if image.mode != "RGB":
165
+ image = image.convert("RGB")
166
+ image = np.array(image)
167
+
168
+ return image
@@ -0,0 +1,16 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from . import catalog as _UNUSED
16
+ from .layoutmodel import EfficientDetLayoutModel
@@ -0,0 +1,88 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from iopath.common.file_io import PathHandler
16
+
17
+ from ..base_catalog import PathManager
18
+
19
+ MODEL_CATALOG = {
20
+ "PubLayNet": {
21
+ "tf_efficientdet_d0": "https://huggingface.co/ClosedUni/publaynet-tf_efficientdet/blob/main/publaynet-tf_efficientdet_d0.pth.tar",
22
+ "tf_efficientdet_d1": "https://huggingface.co/ClosedUni/publaynet-tf_efficientdet/blob/main/publaynet-tf_efficientdet_d1.pth.tar"
23
+ },
24
+ "MFD": {
25
+ "tf_efficientdet_d0": "https://huggingface.co/ClosedUni/mfd-tf_efficientdet/blob/main/mfd-tf_efficientdet_d0.pth.tar",
26
+ "tf_efficientdet_d1": "https://huggingface.co/ClosedUni/mfd-tf_efficientdet/blob/main/mfd-tf_efficientdet_d1.pth.tar"
27
+ }
28
+ }
29
+
30
+ # In effdet training scripts, it requires the label_map starting
31
+ # from 1 instead of 0
32
+ LABEL_MAP_CATALOG = {
33
+ "PubLayNet": {
34
+ 1: "Text",
35
+ 2: "Title",
36
+ 3: "List",
37
+ 4: "Table",
38
+ 5: "Figure"
39
+ },
40
+ "MFD": {
41
+ 1: "Equation",
42
+ }
43
+ }
44
+
45
+ class LayoutParserEfficientDetModelHandler(PathHandler):
46
+ """
47
+ Resolve anything that's in LayoutParser model zoo.
48
+ """
49
+
50
+ PREFIX = "lp://efficientdet/"
51
+
52
+ def _get_supported_prefixes(self):
53
+ return [self.PREFIX]
54
+
55
+ def _get_local_path(self, path, **kwargs):
56
+ model_name = path[len(self.PREFIX) :]
57
+ dataset_name, *model_name, data_type = model_name.split("/")
58
+
59
+ if data_type == "weight":
60
+ # Safely check and import huggingface_hub inside the function execution
61
+ try:
62
+ from huggingface_hub import hf_hub_download
63
+ except ImportError:
64
+ raise ImportError(
65
+ "The 'huggingface_hub' package is required to download custom weights. "
66
+ "Please install it via 'pip install huggingface_hub'."
67
+ )
68
+
69
+ # Dynamically map the filename format (e.g., 'publaynet-tf_efficientdet_d0.pth.tar' or 'mfd-tf_efficientdet_d0.pth.tar')
70
+ filename = f"{dataset_name.lower()}-{'/'.join(model_name)}.pth.tar"
71
+
72
+ # Download weights through Hugging Face official API.
73
+ # Features: Automated caching, handles high-traffic, and works seamlessly across environments (Docker, Colab, etc.)
74
+ local_registry_path = hf_hub_download(
75
+ repo_id="ClosedUni/publaynet-tf_efficientdet",
76
+ filename=filename
77
+ )
78
+ return local_registry_path
79
+
80
+ else:
81
+ raise ValueError(f"Unknown data_type {data_type}")
82
+
83
+ def _open(self, path, mode="r", **kwargs):
84
+ return PathManager.open(self._get_local_path(path), mode, **kwargs)
85
+
86
+
87
+ # Register the handler into layoutparser's path management system
88
+ PathManager.register_handler(LayoutParserEfficientDetModelHandler())