custom-layoutparser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. custom_layoutparser-0.1.0/PKG-INFO +5 -0
  2. custom_layoutparser-0.1.0/custom_layoutparser.egg-info/PKG-INFO +5 -0
  3. custom_layoutparser-0.1.0/custom_layoutparser.egg-info/SOURCES.txt +37 -0
  4. custom_layoutparser-0.1.0/custom_layoutparser.egg-info/dependency_links.txt +1 -0
  5. custom_layoutparser-0.1.0/custom_layoutparser.egg-info/top_level.txt +1 -0
  6. custom_layoutparser-0.1.0/layoutparser/__init__.py +89 -0
  7. custom_layoutparser-0.1.0/layoutparser/elements/__init__.py +25 -0
  8. custom_layoutparser-0.1.0/layoutparser/elements/base.py +275 -0
  9. custom_layoutparser-0.1.0/layoutparser/elements/errors.py +26 -0
  10. custom_layoutparser-0.1.0/layoutparser/elements/layout.py +348 -0
  11. custom_layoutparser-0.1.0/layoutparser/elements/layout_elements.py +1352 -0
  12. custom_layoutparser-0.1.0/layoutparser/elements/utils.py +82 -0
  13. custom_layoutparser-0.1.0/layoutparser/file_utils.py +235 -0
  14. custom_layoutparser-0.1.0/layoutparser/io/__init__.py +2 -0
  15. custom_layoutparser-0.1.0/layoutparser/io/basic.py +148 -0
  16. custom_layoutparser-0.1.0/layoutparser/io/pdf.py +225 -0
  17. custom_layoutparser-0.1.0/layoutparser/models/__init__.py +18 -0
  18. custom_layoutparser-0.1.0/layoutparser/models/auto_layoutmodel.py +70 -0
  19. custom_layoutparser-0.1.0/layoutparser/models/base_catalog.py +34 -0
  20. custom_layoutparser-0.1.0/layoutparser/models/base_layoutmodel.py +88 -0
  21. custom_layoutparser-0.1.0/layoutparser/models/detectron2/__init__.py +18 -0
  22. custom_layoutparser-0.1.0/layoutparser/models/detectron2/catalog.py +142 -0
  23. custom_layoutparser-0.1.0/layoutparser/models/detectron2/layoutmodel.py +168 -0
  24. custom_layoutparser-0.1.0/layoutparser/models/effdet/__init__.py +16 -0
  25. custom_layoutparser-0.1.0/layoutparser/models/effdet/catalog.py +88 -0
  26. custom_layoutparser-0.1.0/layoutparser/models/effdet/layoutmodel.py +256 -0
  27. custom_layoutparser-0.1.0/layoutparser/models/model_config.py +133 -0
  28. custom_layoutparser-0.1.0/layoutparser/models/paddledetection/__init__.py +17 -0
  29. custom_layoutparser-0.1.0/layoutparser/models/paddledetection/catalog.py +214 -0
  30. custom_layoutparser-0.1.0/layoutparser/models/paddledetection/layoutmodel.py +297 -0
  31. custom_layoutparser-0.1.0/layoutparser/ocr/__init__.py +16 -0
  32. custom_layoutparser-0.1.0/layoutparser/ocr/base.py +41 -0
  33. custom_layoutparser-0.1.0/layoutparser/ocr/gcv_agent.py +288 -0
  34. custom_layoutparser-0.1.0/layoutparser/ocr/tesseract_agent.py +193 -0
  35. custom_layoutparser-0.1.0/layoutparser/tools/__init__.py +5 -0
  36. custom_layoutparser-0.1.0/layoutparser/tools/shape_operations.py +167 -0
  37. custom_layoutparser-0.1.0/layoutparser/visualization.py +571 -0
  38. custom_layoutparser-0.1.0/setup.cfg +4 -0
  39. custom_layoutparser-0.1.0/setup.py +9 -0
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: custom-layoutparser
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.8
5
+ Dynamic: requires-python
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: custom-layoutparser
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.8
5
+ Dynamic: requires-python
@@ -0,0 +1,37 @@
1
+ setup.py
2
+ custom_layoutparser.egg-info/PKG-INFO
3
+ custom_layoutparser.egg-info/SOURCES.txt
4
+ custom_layoutparser.egg-info/dependency_links.txt
5
+ custom_layoutparser.egg-info/top_level.txt
6
+ layoutparser/__init__.py
7
+ layoutparser/file_utils.py
8
+ layoutparser/visualization.py
9
+ layoutparser/elements/__init__.py
10
+ layoutparser/elements/base.py
11
+ layoutparser/elements/errors.py
12
+ layoutparser/elements/layout.py
13
+ layoutparser/elements/layout_elements.py
14
+ layoutparser/elements/utils.py
15
+ layoutparser/io/__init__.py
16
+ layoutparser/io/basic.py
17
+ layoutparser/io/pdf.py
18
+ layoutparser/models/__init__.py
19
+ layoutparser/models/auto_layoutmodel.py
20
+ layoutparser/models/base_catalog.py
21
+ layoutparser/models/base_layoutmodel.py
22
+ layoutparser/models/model_config.py
23
+ layoutparser/models/detectron2/__init__.py
24
+ layoutparser/models/detectron2/catalog.py
25
+ layoutparser/models/detectron2/layoutmodel.py
26
+ layoutparser/models/effdet/__init__.py
27
+ layoutparser/models/effdet/catalog.py
28
+ layoutparser/models/effdet/layoutmodel.py
29
+ layoutparser/models/paddledetection/__init__.py
30
+ layoutparser/models/paddledetection/catalog.py
31
+ layoutparser/models/paddledetection/layoutmodel.py
32
+ layoutparser/ocr/__init__.py
33
+ layoutparser/ocr/base.py
34
+ layoutparser/ocr/gcv_agent.py
35
+ layoutparser/ocr/tesseract_agent.py
36
+ layoutparser/tools/__init__.py
37
+ layoutparser/tools/shape_operations.py
@@ -0,0 +1,89 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "0.3.4"
16
+
17
+ import sys
18
+
19
+ from .file_utils import (
20
+ _LazyModule,
21
+ is_detectron2_available,
22
+ is_paddle_available,
23
+ is_effdet_available,
24
+ is_pytesseract_available,
25
+ is_gcv_available,
26
+ )
27
+
28
+ _import_structure = {
29
+ "elements": [
30
+ "Interval",
31
+ "Rectangle",
32
+ "Quadrilateral",
33
+ "TextBlock",
34
+ "Layout"
35
+ ],
36
+ "visualization": [
37
+ "draw_box",
38
+ "draw_text"
39
+ ],
40
+ "io": [
41
+ "load_json",
42
+ "load_dict",
43
+ "load_csv",
44
+ "load_dataframe",
45
+ "load_pdf"
46
+ ],
47
+ "file_utils":[
48
+ "is_torch_available",
49
+ "is_torch_cuda_available",
50
+ "is_detectron2_available",
51
+ "is_paddle_available",
52
+ "is_pytesseract_available",
53
+ "is_gcv_available",
54
+ "requires_backends"
55
+ ],
56
+ "tools": [
57
+ "generalized_connected_component_analysis_1d",
58
+ "simple_line_detection",
59
+ "group_textblocks_based_on_category"
60
+ ]
61
+ }
62
+
63
+ _import_structure["models"] = ["AutoLayoutModel"]
64
+
65
+ if is_detectron2_available():
66
+ _import_structure["models.detectron2"] = ["Detectron2LayoutModel"]
67
+
68
+ if is_paddle_available():
69
+ _import_structure["models.paddledetection"] = ["PaddleDetectionLayoutModel"]
70
+
71
+ if is_effdet_available():
72
+ _import_structure["models.effdet"] = ["EfficientDetLayoutModel"]
73
+
74
+ if is_pytesseract_available():
75
+ _import_structure["ocr.tesseract_agent"] = [
76
+ "TesseractAgent",
77
+ "TesseractFeatureType",
78
+ ]
79
+
80
+ if is_gcv_available():
81
+ _import_structure["ocr.gcv_agent"] = ["GCVAgent", "GCVFeatureType"]
82
+
83
+ sys.modules[__name__] = _LazyModule(
84
+ __name__,
85
+ globals()["__file__"],
86
+ _import_structure,
87
+ module_spec=__spec__,
88
+ extra_objects={"__version__": __version__},
89
+ )
@@ -0,0 +1,25 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .base import BaseCoordElement, BaseLayoutElement
16
+ from .layout_elements import (
17
+ Interval,
18
+ Rectangle,
19
+ Quadrilateral,
20
+ TextBlock,
21
+ ALL_BASECOORD_ELEMENTS,
22
+ BASECOORD_ELEMENT_NAMEMAP,
23
+ BASECOORD_ELEMENT_INDEXMAP,
24
+ )
25
+ from .layout import Layout
@@ -0,0 +1,275 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List, Dict, Dict, Any
16
+ from abc import ABC, abstractmethod
17
+ from copy import copy
18
+
19
+ class BaseLayoutElement:
20
+ def set(self, inplace=False, **kwargs):
21
+
22
+ obj = self if inplace else copy(self)
23
+ var_dict = vars(obj)
24
+ for key, val in kwargs.items():
25
+ if key in var_dict:
26
+ var_dict[key] = val
27
+ elif f"_{key}" in var_dict:
28
+ var_dict[f"_{key}"] = val
29
+ else:
30
+ raise ValueError(f"Unknown attribute name: {key}")
31
+
32
+ return obj
33
+
34
+ def __repr__(self):
35
+
36
+ info_str = ", ".join([f"{key}={val}" for key, val in vars(self).items()])
37
+ return f"{self.__class__.__name__}({info_str})"
38
+
39
+ def __eq__(self, other):
40
+
41
+ if other.__class__ is not self.__class__:
42
+ return False
43
+
44
+ return vars(self) == vars(other)
45
+
46
+
47
+ class BaseCoordElement(ABC, BaseLayoutElement):
48
+ @property
49
+ @abstractmethod
50
+ def _name(self) -> str:
51
+ """The name of the class"""
52
+ pass
53
+
54
+ @property
55
+ @abstractmethod
56
+ def _features(self) -> List[str]:
57
+ """A list of features names used for initializing the class object"""
58
+ pass
59
+
60
+ #######################################################################
61
+ ######################### Layout Properties #########################
62
+ #######################################################################
63
+
64
+ @property
65
+ @abstractmethod
66
+ def width(self):
67
+ pass
68
+
69
+ @property
70
+ @abstractmethod
71
+ def height(self):
72
+ pass
73
+
74
+ @property
75
+ @abstractmethod
76
+ def coordinates(self):
77
+ pass
78
+
79
+ @property
80
+ @abstractmethod
81
+ def points(self):
82
+ pass
83
+
84
+ @property
85
+ @abstractmethod
86
+ def area(self):
87
+ pass
88
+
89
+ #######################################################################
90
+ ### Geometric Relations (relative to, condition on, and is in) ###
91
+ #######################################################################
92
+
93
+ @abstractmethod
94
+ def condition_on(self, other):
95
+ """
96
+ Given the current element in relative coordinates to another element which is in absolute coordinates,
97
+ generate a new element of the current element in absolute coordinates.
98
+
99
+ Args:
100
+ other (:obj:`BaseCoordElement`):
101
+ The other layout element involved in the geometric operations.
102
+
103
+ Raises:
104
+ Exception: Raise error when the input type of the other element is invalid.
105
+
106
+ Returns:
107
+ :obj:`BaseCoordElement`:
108
+ The BaseCoordElement object of the original element in the absolute coordinate system.
109
+ """
110
+
111
+ pass
112
+
113
+ @abstractmethod
114
+ def relative_to(self, other):
115
+ """
116
+ Given the current element and another element both in absolute coordinates,
117
+ generate a new element of the current element in relative coordinates to the other element.
118
+
119
+ Args:
120
+ other (:obj:`BaseCoordElement`): The other layout element involved in the geometric operations.
121
+
122
+ Raises:
123
+ Exception: Raise error when the input type of the other element is invalid.
124
+
125
+ Returns:
126
+ :obj:`BaseCoordElement`:
127
+ The BaseCoordElement object of the original element in the relative coordinate system.
128
+ """
129
+
130
+ pass
131
+
132
+ @abstractmethod
133
+ def is_in(self, other, soft_margin={}, center=False):
134
+ """
135
+ Identify whether the current element is within another element.
136
+
137
+ Args:
138
+ other (:obj:`BaseCoordElement`):
139
+ The other layout element involved in the geometric operations.
140
+ soft_margin (:obj:`dict`, `optional`, defaults to `{}`):
141
+ Enlarge the other element with wider margins to relax the restrictions.
142
+ center (:obj:`bool`, `optional`, defaults to `False`):
143
+ The toggle to determine whether the center (instead of the four corners)
144
+ of the current element is in the other element.
145
+
146
+ Returns:
147
+ :obj:`bool`: Returns `True` if the current element is in the other element and `False` if not.
148
+ """
149
+
150
+ pass
151
+
152
+ #######################################################################
153
+ ################# Shape Operations (intersect, union) ################
154
+ #######################################################################
155
+
156
+ @abstractmethod
157
+ def intersect(self, other: "BaseCoordElement", strict: bool = True):
158
+ """Intersect the current shape with the other object, with operations defined in
159
+ :doc:`../notes/shape_operations`.
160
+ """
161
+
162
+ @abstractmethod
163
+ def union(self, other: "BaseCoordElement", strict: bool = True):
164
+ """Union the current shape with the other object, with operations defined in
165
+ :doc:`../notes/shape_operations`.
166
+ """
167
+
168
+ #######################################################################
169
+ ############### Geometric Operations (pad, shift, scale) ##############
170
+ #######################################################################
171
+
172
+ @abstractmethod
173
+ def pad(self, left=0, right=0, top=0, bottom=0, safe_mode=True):
174
+ """Pad the layout element on the four sides of the polygon with the user-defined pixels. If
175
+ safe_mode is set to True, the function will cut off the excess padding that falls on the negative
176
+ side of the coordinates.
177
+
178
+ Args:
179
+ left (:obj:`int`, `optional`, defaults to 0): The number of pixels to pad on the upper side of the polygon.
180
+ right (:obj:`int`, `optional`, defaults to 0): The number of pixels to pad on the lower side of the polygon.
181
+ top (:obj:`int`, `optional`, defaults to 0): The number of pixels to pad on the left side of the polygon.
182
+ bottom (:obj:`int`, `optional`, defaults to 0): The number of pixels to pad on the right side of the polygon.
183
+ safe_mode (:obj:`bool`, `optional`, defaults to True): A bool value to toggle the safe_mode.
184
+
185
+ Returns:
186
+ :obj:`BaseCoordElement`: The padded BaseCoordElement object.
187
+ """
188
+
189
+ pass
190
+
191
+ @abstractmethod
192
+ def shift(self, shift_distance=0):
193
+ """
194
+ Shift the layout element by user specified amounts on x and y axis respectively. If shift_distance is one
195
+ numeric value, the element will by shifted by the same specified amount on both x and y axis.
196
+
197
+ Args:
198
+ shift_distance (:obj:`numeric` or :obj:`Tuple(numeric)` or :obj:`List[numeric]`):
199
+ The number of pixels used to shift the element.
200
+
201
+ Returns:
202
+ :obj:`BaseCoordElement`: The shifted BaseCoordElement of the same shape-specific class.
203
+ """
204
+
205
+ pass
206
+
207
+ @abstractmethod
208
+ def scale(self, scale_factor=1):
209
+ """
210
+ Scale the layout element by a user specified amount on x and y axis respectively. If scale_factor is one
211
+ numeric value, the element will by scaled by the same specified amount on both x and y axis.
212
+
213
+ Args:
214
+ scale_factor (:obj:`numeric` or :obj:`Tuple(numeric)` or :obj:`List[numeric]`): The amount for downscaling or upscaling the element.
215
+
216
+ Returns:
217
+ :obj:`BaseCoordElement`: The scaled BaseCoordElement of the same shape-specific class.
218
+ """
219
+
220
+ pass
221
+
222
+ #######################################################################
223
+ ################################# MISC ################################
224
+ #######################################################################
225
+
226
+ @abstractmethod
227
+ def crop_image(self, image):
228
+ """
229
+ Crop the input image according to the coordinates of the element.
230
+
231
+ Args:
232
+ image (:obj:`Numpy array`): The array of the input image.
233
+
234
+ Returns:
235
+ :obj:`Numpy array`: The array of the cropped image.
236
+ """
237
+
238
+ pass
239
+
240
+ #######################################################################
241
+ ########################## Import and Export ##########################
242
+ #######################################################################
243
+
244
+ def to_dict(self) -> Dict[str, Any]:
245
+ """
246
+ Generate a dictionary representation of the current object:
247
+ {
248
+ "block_type": <"interval", "rectangle", "quadrilateral"> ,
249
+ "non_empty_block_attr1": value1,
250
+ ...
251
+ }
252
+ """
253
+
254
+ data = {
255
+ key: getattr(self, key)
256
+ for key in self._features
257
+ if getattr(self, key) is not None
258
+ }
259
+ data["block_type"] = self._name
260
+ return data
261
+
262
+ @classmethod
263
+ def from_dict(cls, data: Dict[str, Any]) -> "BaseCoordElement":
264
+ """Initialize an instance based on the dictionary representation
265
+
266
+ Args:
267
+ data (:obj:`dict`): The dictionary representation of the object
268
+ """
269
+
270
+ assert (
271
+ cls._name == data["block_type"]
272
+ ), f"Incompatible block types {data['block_type']}"
273
+
274
+ return cls(**{f: data[f] for f in cls._features})
275
+
@@ -0,0 +1,26 @@
1
+ # Copyright 2021 The Layout Parser team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ class NotSupportedShapeError(Exception):
16
+ """For now (v0.2), if the created shape might be a polygon (shapes with more than 4 vertices),
17
+ layoutparser will raise NotSupportedShapeError. It is expected to be fixed in the future versions.
18
+ See
19
+ :ref:`shape_operations:problems-related-to-the-quadrilateral-class`.
20
+ """
21
+
22
+
23
+ class InvalidShapeError(Exception):
24
+ """For shape operations like intersection of union, lp will raise the InvalidShapeError when
25
+ invalid shapes are created (e.g., intersecting a rectangle and an interval).
26
+ """