custom-layoutparser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- custom_layoutparser-0.1.0.dist-info/METADATA +5 -0
- custom_layoutparser-0.1.0.dist-info/RECORD +36 -0
- custom_layoutparser-0.1.0.dist-info/WHEEL +5 -0
- custom_layoutparser-0.1.0.dist-info/top_level.txt +1 -0
- layoutparser/__init__.py +89 -0
- layoutparser/elements/__init__.py +25 -0
- layoutparser/elements/base.py +275 -0
- layoutparser/elements/errors.py +26 -0
- layoutparser/elements/layout.py +348 -0
- layoutparser/elements/layout_elements.py +1352 -0
- layoutparser/elements/utils.py +82 -0
- layoutparser/file_utils.py +235 -0
- layoutparser/io/__init__.py +2 -0
- layoutparser/io/basic.py +148 -0
- layoutparser/io/pdf.py +225 -0
- layoutparser/models/__init__.py +18 -0
- layoutparser/models/auto_layoutmodel.py +70 -0
- layoutparser/models/base_catalog.py +34 -0
- layoutparser/models/base_layoutmodel.py +88 -0
- layoutparser/models/detectron2/__init__.py +18 -0
- layoutparser/models/detectron2/catalog.py +142 -0
- layoutparser/models/detectron2/layoutmodel.py +168 -0
- layoutparser/models/effdet/__init__.py +16 -0
- layoutparser/models/effdet/catalog.py +88 -0
- layoutparser/models/effdet/layoutmodel.py +256 -0
- layoutparser/models/model_config.py +133 -0
- layoutparser/models/paddledetection/__init__.py +17 -0
- layoutparser/models/paddledetection/catalog.py +214 -0
- layoutparser/models/paddledetection/layoutmodel.py +297 -0
- layoutparser/ocr/__init__.py +16 -0
- layoutparser/ocr/base.py +41 -0
- layoutparser/ocr/gcv_agent.py +288 -0
- layoutparser/ocr/tesseract_agent.py +193 -0
- layoutparser/tools/__init__.py +5 -0
- layoutparser/tools/shape_operations.py +167 -0
- layoutparser/visualization.py +571 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .detectron2.layoutmodel import Detectron2LayoutModel
|
|
16
|
+
from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
|
|
17
|
+
from .effdet.layoutmodel import EfficientDetLayoutModel
|
|
18
|
+
from .auto_layoutmodel import AutoLayoutModel
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Optional, Dict, Union, List
|
|
16
|
+
from .detectron2.layoutmodel import Detectron2LayoutModel
|
|
17
|
+
from .paddledetection.layoutmodel import PaddleDetectionLayoutModel
|
|
18
|
+
from .effdet.layoutmodel import EfficientDetLayoutModel
|
|
19
|
+
from .model_config import (
|
|
20
|
+
is_lp_layout_model_config_any_format,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
ALL_AVAILABLE_BACKENDS = {
|
|
24
|
+
Detectron2LayoutModel.DETECTOR_NAME: Detectron2LayoutModel,
|
|
25
|
+
PaddleDetectionLayoutModel.DETECTOR_NAME: PaddleDetectionLayoutModel,
|
|
26
|
+
EfficientDetLayoutModel.DETECTOR_NAME: EfficientDetLayoutModel,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def AutoLayoutModel(
|
|
31
|
+
config_path: str,
|
|
32
|
+
model_path: Optional[str] = None,
|
|
33
|
+
label_map: Optional[Dict]=None,
|
|
34
|
+
device: Optional[str]=None,
|
|
35
|
+
extra_config: Optional[Union[Dict, List]]=None,
|
|
36
|
+
) -> "BaseLayoutModel":
|
|
37
|
+
"""[summary]
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
config_path (:obj:`str`):
|
|
41
|
+
The path to the configuration file.
|
|
42
|
+
model_path (:obj:`str`, None):
|
|
43
|
+
The path to the saved weights of the model.
|
|
44
|
+
If set, overwrite the weights in the configuration file.
|
|
45
|
+
Defaults to `None`.
|
|
46
|
+
label_map (:obj:`dict`, optional):
|
|
47
|
+
The map from the model prediction (ids) to real
|
|
48
|
+
word labels (strings). If the config is from one of the supported
|
|
49
|
+
datasets, Layout Parser will automatically initialize the label_map.
|
|
50
|
+
Defaults to `None`.
|
|
51
|
+
device(:obj:`str`, optional):
|
|
52
|
+
Whether to use cuda or cpu devices. If not set, LayoutParser will
|
|
53
|
+
automatically determine the device to initialize the models on.
|
|
54
|
+
extra_config (:obj:`dict`, optional):
|
|
55
|
+
Extra configuration passed used for initializing the layout model.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
# BaseLayoutModel: the create LayoutModel instance
|
|
59
|
+
"""
|
|
60
|
+
if not is_lp_layout_model_config_any_format(config_path):
|
|
61
|
+
raise ValueError(f"Invalid model config_path {config_path}")
|
|
62
|
+
for backend_name in ALL_AVAILABLE_BACKENDS:
|
|
63
|
+
if backend_name in config_path:
|
|
64
|
+
return ALL_AVAILABLE_BACKENDS[backend_name](
|
|
65
|
+
config_path,
|
|
66
|
+
model_path=model_path,
|
|
67
|
+
label_map=label_map,
|
|
68
|
+
extra_config=extra_config,
|
|
69
|
+
device=device,
|
|
70
|
+
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from iopath.common.file_io import HTTPURLHandler
|
|
16
|
+
from iopath.common.file_io import PathManager as PathManagerBase
|
|
17
|
+
|
|
18
|
+
# A trick learned from https://github.com/facebookresearch/detectron2/blob/65faeb4779e4c142484deeece18dc958c5c9ad18/detectron2/utils/file_io.py#L3
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DropboxHandler(HTTPURLHandler):
|
|
22
|
+
"""
|
|
23
|
+
Supports download and file check for dropbox links
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def _get_supported_prefixes(self):
|
|
27
|
+
return ["https://www.dropbox.com"]
|
|
28
|
+
|
|
29
|
+
def _isfile(self, path):
|
|
30
|
+
return path in self.cache_map
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
PathManager = PathManagerBase()
|
|
34
|
+
PathManager.register_handler(DropboxHandler())
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Optional, Tuple, Union, Dict
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
|
|
18
|
+
from .model_config import LayoutModelConfig, add_identifier_for_config, layout_model_config_parser, is_lp_layout_model_config_any_format
|
|
19
|
+
from ..file_utils import requires_backends
|
|
20
|
+
|
|
21
|
+
class BaseLayoutModel(ABC):
|
|
22
|
+
|
|
23
|
+
# TODO: Build a metaclass for lazy module loader
|
|
24
|
+
@property
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def DEPENDENCIES(self):
|
|
27
|
+
"""DEPENDENCIES lists all necessary dependencies for the class."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def DETECTOR_NAME(self):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def MODEL_CATALOG(self) -> Dict[str, Dict[str, str]]:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def detect(self, image: Union["np.ndarray", "Image.Image"]):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
|
|
47
|
+
"""It will process the input images appropriately to the target format."""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
def _parse_config(self, config_path:str, identifier:str) -> Union[LayoutModelConfig, str]:
|
|
51
|
+
|
|
52
|
+
if is_lp_layout_model_config_any_format(config_path):
|
|
53
|
+
config_path = add_identifier_for_config(config_path, identifier)
|
|
54
|
+
for dataset_name in self.MODEL_CATALOG:
|
|
55
|
+
if dataset_name in config_path:
|
|
56
|
+
default_model_arch = list(self.MODEL_CATALOG[dataset_name].keys())[0]
|
|
57
|
+
# Use the first model_name for the dataset as the default_model_arch
|
|
58
|
+
return layout_model_config_parser(config_path, self.DETECTOR_NAME, default_model_arch)
|
|
59
|
+
raise ValueError(f"The config {config_path} is not a valid config for {self.__class__}, "
|
|
60
|
+
f"possibly because there aren't models trained for the specified dataset.")
|
|
61
|
+
else:
|
|
62
|
+
return config_path
|
|
63
|
+
|
|
64
|
+
def config_parser(self, config_path:str, model_path: Optional[str], allow_empty_path=False) -> Tuple[str, str]:
|
|
65
|
+
|
|
66
|
+
config_path = self._parse_config(config_path, "config")
|
|
67
|
+
|
|
68
|
+
if isinstance(config_path, str) and model_path is None:
|
|
69
|
+
if not allow_empty_path:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
f"Invalid config and model path pairs ({(config_path, model_path)}):"
|
|
72
|
+
f"When config_path is a regular URL, the model_path should not be empty"
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
return config_path, model_path
|
|
76
|
+
elif isinstance(config_path, LayoutModelConfig) and model_path is None:
|
|
77
|
+
model_path = config_path.dual()
|
|
78
|
+
else:
|
|
79
|
+
model_path = self._parse_config(model_path, "weight")
|
|
80
|
+
|
|
81
|
+
config_path = config_path if isinstance(config_path, str) else config_path.full
|
|
82
|
+
model_path = model_path if isinstance(model_path, str) else model_path.full
|
|
83
|
+
return config_path, model_path
|
|
84
|
+
|
|
85
|
+
def __new__(cls, *args, **kwargs):
|
|
86
|
+
|
|
87
|
+
requires_backends(cls, cls.DEPENDENCIES)
|
|
88
|
+
return super().__new__(cls)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from . import catalog as _UNUSED
|
|
16
|
+
# A trick learned from
|
|
17
|
+
# https://github.com/facebookresearch/detectron2/blob/62cf3a2b6840734d2717abdf96e2dd57ed6612a6/detectron2/checkpoint/__init__.py#L6
|
|
18
|
+
from .layoutmodel import Detectron2LayoutModel
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from iopath.common.file_io import PathHandler
|
|
16
|
+
|
|
17
|
+
from ..base_catalog import PathManager
|
|
18
|
+
|
|
19
|
+
MODEL_CATALOG = {
|
|
20
|
+
"HJDataset": {
|
|
21
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6icw6at8m28a2ho/model_final.pth?dl=1",
|
|
22
|
+
"mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/893paxpy5suvlx9/model_final.pth?dl=1",
|
|
23
|
+
"retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/yxsloxu3djt456i/model_final.pth?dl=1",
|
|
24
|
+
},
|
|
25
|
+
"PubLayNet": {
|
|
26
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/dgy9c10wykk4lq4/model_final.pth?dl=1",
|
|
27
|
+
"mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/d9fc9tahfzyl6df/model_final.pth?dl=1",
|
|
28
|
+
"mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/57zjbwv6gh3srry/model_final.pth?dl=1",
|
|
29
|
+
},
|
|
30
|
+
"PrimaLayout": {
|
|
31
|
+
"mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/h7th27jfv19rxiy/model_final.pth?dl=1"
|
|
32
|
+
},
|
|
33
|
+
"NewspaperNavigator": {
|
|
34
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6ewh6g8rqt2ev3a/model_final.pth?dl=1",
|
|
35
|
+
},
|
|
36
|
+
"TableBank": {
|
|
37
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/8v4uqmz1at9v72a/model_final.pth?dl=1",
|
|
38
|
+
"faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/6vzfk8lk9xvyitg/model_final.pth?dl=1",
|
|
39
|
+
},
|
|
40
|
+
"MFD": {
|
|
41
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7xel0i3iqpm2p8y/model_final.pth?dl=1",
|
|
42
|
+
},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
CONFIG_CATALOG = {
|
|
46
|
+
"HJDataset": {
|
|
47
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/j4yseny2u0hn22r/config.yml?dl=1",
|
|
48
|
+
"mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/4jmr3xanmxmjcf8/config.yml?dl=1",
|
|
49
|
+
"retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/z8a8ywozuyc5c2x/config.yml?dl=1",
|
|
50
|
+
},
|
|
51
|
+
"PubLayNet": {
|
|
52
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/f3b12qc4hc0yh4m/config.yml?dl=1",
|
|
53
|
+
"mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/u9wbsfwz4y0ziki/config.yml?dl=1",
|
|
54
|
+
"mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/nau5ut6zgthunil/config.yaml?dl=1",
|
|
55
|
+
},
|
|
56
|
+
"PrimaLayout": {
|
|
57
|
+
"mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/yc92x97k50abynt/config.yaml?dl=1"
|
|
58
|
+
},
|
|
59
|
+
"NewspaperNavigator": {
|
|
60
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml?dl=1",
|
|
61
|
+
},
|
|
62
|
+
"TableBank": {
|
|
63
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7cqle02do7ah7k4/config.yaml?dl=1",
|
|
64
|
+
"faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/h63n6nv51kfl923/config.yaml?dl=1",
|
|
65
|
+
},
|
|
66
|
+
"MFD": {
|
|
67
|
+
"faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1",
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# fmt: off
|
|
72
|
+
LABEL_MAP_CATALOG = {
|
|
73
|
+
"HJDataset": {
|
|
74
|
+
1: "Page Frame",
|
|
75
|
+
2: "Row",
|
|
76
|
+
3: "Title Region",
|
|
77
|
+
4: "Text Region",
|
|
78
|
+
5: "Title",
|
|
79
|
+
6: "Subtitle",
|
|
80
|
+
7: "Other",
|
|
81
|
+
},
|
|
82
|
+
"PubLayNet": {
|
|
83
|
+
0: "Text",
|
|
84
|
+
1: "Title",
|
|
85
|
+
2: "List",
|
|
86
|
+
3: "Table",
|
|
87
|
+
4: "Figure"},
|
|
88
|
+
"PrimaLayout": {
|
|
89
|
+
1: "TextRegion",
|
|
90
|
+
2: "ImageRegion",
|
|
91
|
+
3: "TableRegion",
|
|
92
|
+
4: "MathsRegion",
|
|
93
|
+
5: "SeparatorRegion",
|
|
94
|
+
6: "OtherRegion",
|
|
95
|
+
},
|
|
96
|
+
"NewspaperNavigator": {
|
|
97
|
+
0: "Photograph",
|
|
98
|
+
1: "Illustration",
|
|
99
|
+
2: "Map",
|
|
100
|
+
3: "Comics/Cartoon",
|
|
101
|
+
4: "Editorial Cartoon",
|
|
102
|
+
5: "Headline",
|
|
103
|
+
6: "Advertisement",
|
|
104
|
+
},
|
|
105
|
+
"TableBank": {
|
|
106
|
+
0: "Table"
|
|
107
|
+
},
|
|
108
|
+
"MFD": {
|
|
109
|
+
1: "Equation"
|
|
110
|
+
},
|
|
111
|
+
}
|
|
112
|
+
# fmt: on
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class LayoutParserDetectron2ModelHandler(PathHandler):
|
|
116
|
+
"""
|
|
117
|
+
Resolve anything that's in LayoutParser model zoo.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
PREFIX = "lp://detectron2/"
|
|
121
|
+
|
|
122
|
+
def _get_supported_prefixes(self):
|
|
123
|
+
return [self.PREFIX]
|
|
124
|
+
|
|
125
|
+
def _get_local_path(self, path, **kwargs):
|
|
126
|
+
model_name = path[len(self.PREFIX) :]
|
|
127
|
+
|
|
128
|
+
dataset_name, *model_name, data_type = model_name.split("/")
|
|
129
|
+
|
|
130
|
+
if data_type == "weight":
|
|
131
|
+
model_url = MODEL_CATALOG[dataset_name]["/".join(model_name)]
|
|
132
|
+
elif data_type == "config":
|
|
133
|
+
model_url = CONFIG_CATALOG[dataset_name]["/".join(model_name)]
|
|
134
|
+
else:
|
|
135
|
+
raise ValueError(f"Unknown data_type {data_type}")
|
|
136
|
+
return PathManager.get_local_path(model_url, **kwargs)
|
|
137
|
+
|
|
138
|
+
def _open(self, path, mode="r", **kwargs):
|
|
139
|
+
return PathManager.open(self._get_local_path(path), mode, **kwargs)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
PathManager.register_handler(LayoutParserDetectron2ModelHandler())
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Union
|
|
16
|
+
from PIL import Image
|
|
17
|
+
import numpy as np
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
20
|
+
from .catalog import MODEL_CATALOG, PathManager, LABEL_MAP_CATALOG
|
|
21
|
+
from ..base_layoutmodel import BaseLayoutModel
|
|
22
|
+
from ...elements import Rectangle, TextBlock, Layout
|
|
23
|
+
from ...file_utils import is_torch_cuda_available, is_detectron2_available
|
|
24
|
+
|
|
25
|
+
if is_detectron2_available():
|
|
26
|
+
import detectron2.engine
|
|
27
|
+
import detectron2.config
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = ["Detectron2LayoutModel"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Detectron2LayoutModel(BaseLayoutModel):
|
|
34
|
+
"""Create a Detectron2-based Layout Detection Model
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
config_path (:obj:`str`):
|
|
38
|
+
The path to the configuration file.
|
|
39
|
+
model_path (:obj:`str`, None):
|
|
40
|
+
The path to the saved weights of the model.
|
|
41
|
+
If set, overwrite the weights in the configuration file.
|
|
42
|
+
Defaults to `None`.
|
|
43
|
+
label_map (:obj:`dict`, optional):
|
|
44
|
+
The map from the model prediction (ids) to real
|
|
45
|
+
word labels (strings). If the config is from one of the supported
|
|
46
|
+
datasets, Layout Parser will automatically initialize the label_map.
|
|
47
|
+
Defaults to `None`.
|
|
48
|
+
device(:obj:`str`, optional):
|
|
49
|
+
Whether to use cuda or cpu devices. If not set, LayoutParser will
|
|
50
|
+
automatically determine the device to initialize the models on.
|
|
51
|
+
extra_config (:obj:`list`, optional):
|
|
52
|
+
Extra configuration passed to the Detectron2 model
|
|
53
|
+
configuration. The argument will be used in the `merge_from_list
|
|
54
|
+
<https://detectron2.readthedocs.io/modules/config.html
|
|
55
|
+
#detectron2.config.CfgNode.merge_from_list>`_ function.
|
|
56
|
+
Defaults to `[]`.
|
|
57
|
+
|
|
58
|
+
Examples::
|
|
59
|
+
>>> import layoutparser as lp
|
|
60
|
+
>>> model = lp.Detectron2LayoutModel('lp://HJDataset/faster_rcnn_R_50_FPN_3x/config')
|
|
61
|
+
>>> model.detect(image)
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
DEPENDENCIES = ["detectron2"]
|
|
66
|
+
DETECTOR_NAME = "detectron2"
|
|
67
|
+
MODEL_CATALOG = MODEL_CATALOG
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
config_path,
|
|
72
|
+
model_path=None,
|
|
73
|
+
label_map=None,
|
|
74
|
+
extra_config=None,
|
|
75
|
+
enforce_cpu=None,
|
|
76
|
+
device=None,
|
|
77
|
+
):
|
|
78
|
+
|
|
79
|
+
if enforce_cpu is not None:
|
|
80
|
+
warnings.warn(
|
|
81
|
+
"Setting enforce_cpu is deprecated. Please set `device` instead.",
|
|
82
|
+
DeprecationWarning,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
if extra_config is None:
|
|
86
|
+
extra_config = []
|
|
87
|
+
|
|
88
|
+
config_path, model_path = self.config_parser(
|
|
89
|
+
config_path, model_path, allow_empty_path=True
|
|
90
|
+
)
|
|
91
|
+
config_path = PathManager.get_local_path(config_path)
|
|
92
|
+
|
|
93
|
+
if label_map is None:
|
|
94
|
+
if config_path.startswith("lp://"):
|
|
95
|
+
dataset_name = config_path.lstrip("lp://").split("/")[1]
|
|
96
|
+
label_map = LABEL_MAP_CATALOG[dataset_name]
|
|
97
|
+
else:
|
|
98
|
+
label_map = {}
|
|
99
|
+
|
|
100
|
+
cfg = detectron2.config.get_cfg()
|
|
101
|
+
cfg.merge_from_file(config_path)
|
|
102
|
+
cfg.merge_from_list(extra_config)
|
|
103
|
+
|
|
104
|
+
if model_path is not None:
|
|
105
|
+
model_path = PathManager.get_local_path(model_path)
|
|
106
|
+
# Because it will be forwarded to the detectron2 paths
|
|
107
|
+
cfg.MODEL.WEIGHTS = model_path
|
|
108
|
+
|
|
109
|
+
if is_torch_cuda_available():
|
|
110
|
+
if device is None:
|
|
111
|
+
device = "cuda"
|
|
112
|
+
else:
|
|
113
|
+
device = "cpu"
|
|
114
|
+
cfg.MODEL.DEVICE = device
|
|
115
|
+
|
|
116
|
+
self.cfg = cfg
|
|
117
|
+
|
|
118
|
+
self.label_map = label_map
|
|
119
|
+
self._create_model()
|
|
120
|
+
|
|
121
|
+
def _create_model(self):
|
|
122
|
+
self.model = detectron2.engine.DefaultPredictor(self.cfg)
|
|
123
|
+
|
|
124
|
+
def gather_output(self, outputs):
|
|
125
|
+
|
|
126
|
+
instance_pred = outputs["instances"].to("cpu")
|
|
127
|
+
|
|
128
|
+
layout = Layout()
|
|
129
|
+
scores = instance_pred.scores.tolist()
|
|
130
|
+
boxes = instance_pred.pred_boxes.tensor.tolist()
|
|
131
|
+
labels = instance_pred.pred_classes.tolist()
|
|
132
|
+
|
|
133
|
+
for score, box, label in zip(scores, boxes, labels):
|
|
134
|
+
x_1, y_1, x_2, y_2 = box
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
label = self.label_map.get(label, label)
|
|
138
|
+
|
|
139
|
+
cur_block = TextBlock(
|
|
140
|
+
Rectangle(x_1, y_1, x_2, y_2), type=label, score=score
|
|
141
|
+
)
|
|
142
|
+
layout.append(cur_block)
|
|
143
|
+
|
|
144
|
+
return layout
|
|
145
|
+
|
|
146
|
+
def detect(self, image):
|
|
147
|
+
"""Detect the layout of a given image.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
image (:obj:`np.ndarray` or `PIL.Image`): The input image to detect.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
:obj:`~layoutparser.Layout`: The detected layout of the input image
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
image = self.image_loader(image)
|
|
157
|
+
outputs = self.model(image)
|
|
158
|
+
layout = self.gather_output(outputs)
|
|
159
|
+
return layout
|
|
160
|
+
|
|
161
|
+
def image_loader(self, image: Union["np.ndarray", "Image.Image"]):
|
|
162
|
+
# Convert PIL Image Input
|
|
163
|
+
if isinstance(image, Image.Image):
|
|
164
|
+
if image.mode != "RGB":
|
|
165
|
+
image = image.convert("RGB")
|
|
166
|
+
image = np.array(image)
|
|
167
|
+
|
|
168
|
+
return image
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from . import catalog as _UNUSED
|
|
16
|
+
from .layoutmodel import EfficientDetLayoutModel
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Copyright 2021 The Layout Parser team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from iopath.common.file_io import PathHandler
|
|
16
|
+
|
|
17
|
+
from ..base_catalog import PathManager
|
|
18
|
+
|
|
19
|
+
MODEL_CATALOG = {
|
|
20
|
+
"PubLayNet": {
|
|
21
|
+
"tf_efficientdet_d0": "https://huggingface.co/ClosedUni/publaynet-tf_efficientdet/blob/main/publaynet-tf_efficientdet_d0.pth.tar",
|
|
22
|
+
"tf_efficientdet_d1": "https://huggingface.co/ClosedUni/publaynet-tf_efficientdet/blob/main/publaynet-tf_efficientdet_d1.pth.tar"
|
|
23
|
+
},
|
|
24
|
+
"MFD": {
|
|
25
|
+
"tf_efficientdet_d0": "https://huggingface.co/ClosedUni/mfd-tf_efficientdet/blob/main/mfd-tf_efficientdet_d0.pth.tar",
|
|
26
|
+
"tf_efficientdet_d1": "https://huggingface.co/ClosedUni/mfd-tf_efficientdet/blob/main/mfd-tf_efficientdet_d1.pth.tar"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# In effdet training scripts, it requires the label_map starting
|
|
31
|
+
# from 1 instead of 0
|
|
32
|
+
LABEL_MAP_CATALOG = {
|
|
33
|
+
"PubLayNet": {
|
|
34
|
+
1: "Text",
|
|
35
|
+
2: "Title",
|
|
36
|
+
3: "List",
|
|
37
|
+
4: "Table",
|
|
38
|
+
5: "Figure"
|
|
39
|
+
},
|
|
40
|
+
"MFD": {
|
|
41
|
+
1: "Equation",
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
class LayoutParserEfficientDetModelHandler(PathHandler):
|
|
46
|
+
"""
|
|
47
|
+
Resolve anything that's in LayoutParser model zoo.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
PREFIX = "lp://efficientdet/"
|
|
51
|
+
|
|
52
|
+
def _get_supported_prefixes(self):
|
|
53
|
+
return [self.PREFIX]
|
|
54
|
+
|
|
55
|
+
def _get_local_path(self, path, **kwargs):
|
|
56
|
+
model_name = path[len(self.PREFIX) :]
|
|
57
|
+
dataset_name, *model_name, data_type = model_name.split("/")
|
|
58
|
+
|
|
59
|
+
if data_type == "weight":
|
|
60
|
+
# Safely check and import huggingface_hub inside the function execution
|
|
61
|
+
try:
|
|
62
|
+
from huggingface_hub import hf_hub_download
|
|
63
|
+
except ImportError:
|
|
64
|
+
raise ImportError(
|
|
65
|
+
"The 'huggingface_hub' package is required to download custom weights. "
|
|
66
|
+
"Please install it via 'pip install huggingface_hub'."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Dynamically map the filename format (e.g., 'publaynet-tf_efficientdet_d0.pth.tar' or 'mfd-tf_efficientdet_d0.pth.tar')
|
|
70
|
+
filename = f"{dataset_name.lower()}-{'/'.join(model_name)}.pth.tar"
|
|
71
|
+
|
|
72
|
+
# Download weights through Hugging Face official API.
|
|
73
|
+
# Features: Automated caching, handles high-traffic, and works seamlessly across environments (Docker, Colab, etc.)
|
|
74
|
+
local_registry_path = hf_hub_download(
|
|
75
|
+
repo_id="ClosedUni/publaynet-tf_efficientdet",
|
|
76
|
+
filename=filename
|
|
77
|
+
)
|
|
78
|
+
return local_registry_path
|
|
79
|
+
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError(f"Unknown data_type {data_type}")
|
|
82
|
+
|
|
83
|
+
def _open(self, path, mode="r", **kwargs):
|
|
84
|
+
return PathManager.open(self._get_local_path(path), mode, **kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Register the handler into layoutparser's path management system
|
|
88
|
+
PathManager.register_handler(LayoutParserEfficientDetModelHandler())
|