deepdoctection 0.34__py3-none-any.whl → 0.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +7 -14
- deepdoctection/analyzer/__init__.py +1 -0
- deepdoctection/analyzer/_config.py +142 -0
- deepdoctection/analyzer/dd.py +11 -335
- deepdoctection/analyzer/factory.py +718 -0
- deepdoctection/configs/conf_dd_one.yaml +5 -0
- deepdoctection/datapoint/annotation.py +1 -1
- deepdoctection/datapoint/convert.py +6 -4
- deepdoctection/datapoint/image.py +16 -6
- deepdoctection/datapoint/view.py +91 -15
- deepdoctection/eval/cocometric.py +59 -13
- deepdoctection/extern/pdftext.py +96 -5
- deepdoctection/extern/tessocr.py +1 -0
- deepdoctection/mapper/match.py +4 -2
- deepdoctection/utils/env_info.py +30 -1
- deepdoctection/utils/file_utils.py +19 -0
- deepdoctection/utils/metacfg.py +12 -0
- deepdoctection/utils/pdf_utils.py +86 -3
- deepdoctection/utils/utils.py +39 -0
- deepdoctection/utils/viz.py +16 -13
- {deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/METADATA +126 -116
- {deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/RECORD +25 -23
- {deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/WHEEL +1 -1
- {deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/LICENSE +0 -0
- {deepdoctection-0.34.dist-info → deepdoctection-0.36.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
USE_ROTATOR: False
|
|
1
2
|
USE_LAYOUT: True
|
|
2
3
|
USE_TABLE_SEGMENTATION: True
|
|
3
4
|
TF:
|
|
@@ -97,3 +98,7 @@ TEXT_ORDERING:
|
|
|
97
98
|
BROKEN_LINE_TOLERANCE: 0.003
|
|
98
99
|
HEIGHT_TOLERANCE: 2.0
|
|
99
100
|
PARAGRAPH_BREAK: 0.035
|
|
101
|
+
USE_LAYOUT_LINK: False
|
|
102
|
+
LAYOUT_LINK:
|
|
103
|
+
PARENTAL_CATEGORIES:
|
|
104
|
+
CHILD_CATEGORIES:
|
|
@@ -527,5 +527,5 @@ class ContainerAnnotation(CategoryAnnotation):
|
|
|
527
527
|
def from_dict(cls, **kwargs: AnnotationDict) -> ContainerAnnotation:
|
|
528
528
|
container_ann = ann_from_dict(cls, **kwargs)
|
|
529
529
|
value = kwargs.get("value", "")
|
|
530
|
-
container_ann.value = value if isinstance(value, str) else list(value)
|
|
530
|
+
container_ann.value = value if isinstance(value, (int, float, str)) else list(value)
|
|
531
531
|
return container_ann
|
|
@@ -143,11 +143,13 @@ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -
|
|
|
143
143
|
return np_array.astype(uint8)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] =
|
|
146
|
+
def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] = 200) -> PixelValues:
|
|
147
147
|
"""
|
|
148
|
-
Converts a pdf passed as bytes into a numpy array.
|
|
149
|
-
|
|
150
|
-
|
|
148
|
+
Converts a pdf passed as bytes into a numpy array. We use poppler or pdfmium to convert the pdf to an image.
|
|
149
|
+
If both is available you can steer the selection of the render engine with environment variables:
|
|
150
|
+
|
|
151
|
+
USE_DD_POPPLER: Set to 1, "TRUE", "True" to use poppler
|
|
152
|
+
USE_DD_PDFIUM: Set to 1, "TRUE", "True" to use pdfium
|
|
151
153
|
|
|
152
154
|
:param pdf_bytes: A pdf as bytes object. A byte representation can from a pdf file can be generated e.g. with
|
|
153
155
|
`utils.fs.load_bytes_from_pdf_file`
|
|
@@ -23,7 +23,7 @@ from __future__ import annotations
|
|
|
23
23
|
import json
|
|
24
24
|
from collections import defaultdict
|
|
25
25
|
from dataclasses import dataclass, field
|
|
26
|
-
from os import environ
|
|
26
|
+
from os import environ, fspath
|
|
27
27
|
from pathlib import Path
|
|
28
28
|
from typing import Any, Optional, Sequence, Union, no_type_check
|
|
29
29
|
|
|
@@ -412,13 +412,22 @@ class Image:
|
|
|
412
412
|
img_dict["_image"] = None
|
|
413
413
|
return img_dict
|
|
414
414
|
|
|
415
|
+
def as_json(self) -> str:
|
|
416
|
+
"""
|
|
417
|
+
Returns the full image dataclass as json string.
|
|
418
|
+
|
|
419
|
+
:return: A json string.
|
|
420
|
+
"""
|
|
421
|
+
|
|
422
|
+
return json.dumps(self.as_dict(), indent=4)
|
|
423
|
+
|
|
415
424
|
@staticmethod
|
|
416
425
|
def remove_keys() -> list[str]:
|
|
417
426
|
"""
|
|
418
427
|
A list of attributes to suspend from as_dict creation.
|
|
419
428
|
"""
|
|
420
429
|
|
|
421
|
-
return ["_image", "_annotation_ids"]
|
|
430
|
+
return ["_image", "_annotation_ids", "_category_name"]
|
|
422
431
|
|
|
423
432
|
def define_annotation_id(self, annotation: Annotation) -> str:
|
|
424
433
|
"""
|
|
@@ -443,7 +452,8 @@ class Image:
|
|
|
443
452
|
|
|
444
453
|
Calls `List.remove`. Make sure, the element is in the list for otherwise a ValueError will be raised.
|
|
445
454
|
|
|
446
|
-
:param
|
|
455
|
+
:param annotation_ids: The annotation to remove
|
|
456
|
+
:param service_ids: The service id to remove
|
|
447
457
|
"""
|
|
448
458
|
ann_id_to_annotation_maps = self.get_annotation_id_to_annotation_maps()
|
|
449
459
|
|
|
@@ -703,13 +713,13 @@ class Image:
|
|
|
703
713
|
path = path / self.image_id
|
|
704
714
|
suffix = path.suffix
|
|
705
715
|
if suffix:
|
|
706
|
-
path_json = path
|
|
716
|
+
path_json = fspath(path).replace(suffix, ".json")
|
|
707
717
|
else:
|
|
708
|
-
path_json = path
|
|
718
|
+
path_json = fspath(path) + ".json"
|
|
709
719
|
if highest_hierarchy_only:
|
|
710
720
|
self.remove_image_from_lower_hierachy()
|
|
711
721
|
export_dict = self.as_dict()
|
|
712
|
-
export_dict["location"] =
|
|
722
|
+
export_dict["location"] = fspath(export_dict["location"])
|
|
713
723
|
if not image_to_json:
|
|
714
724
|
export_dict["_image"] = None
|
|
715
725
|
if dry:
|
deepdoctection/datapoint/view.py
CHANGED
|
@@ -25,6 +25,7 @@ from copy import copy
|
|
|
25
25
|
from typing import Any, Mapping, Optional, Sequence, Type, TypedDict, Union, no_type_check
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
|
+
from typing_extensions import LiteralString
|
|
28
29
|
|
|
29
30
|
from ..utils.error import AnnotationError, ImageError
|
|
30
31
|
from ..utils.logger import LoggingRecord, logger
|
|
@@ -40,10 +41,12 @@ from ..utils.settings import (
|
|
|
40
41
|
WordType,
|
|
41
42
|
get_type,
|
|
42
43
|
)
|
|
44
|
+
from ..utils.transform import ResizeTransform
|
|
43
45
|
from ..utils.types import HTML, AnnotationDict, Chunks, ImageDict, PathLikeOrStr, PixelValues, Text_, csv
|
|
44
46
|
from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
|
|
45
47
|
from .annotation import CategoryAnnotation, ContainerAnnotation, ImageAnnotation, ann_from_dict
|
|
46
48
|
from .box import BoundingBox, crop_box_from_image
|
|
49
|
+
from .convert import box_to_point4, point4_to_box
|
|
47
50
|
from .image import Image
|
|
48
51
|
|
|
49
52
|
|
|
@@ -101,7 +104,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
|
|
|
101
104
|
return np_image
|
|
102
105
|
raise AnnotationError(f"base_page.image is None for {self.annotation_id}")
|
|
103
106
|
|
|
104
|
-
def __getattr__(self, item: str) -> Optional[Union[str, int, list[str]]]:
|
|
107
|
+
def __getattr__(self, item: str) -> Optional[Union[str, int, list[str], list[ImageAnnotationBaseView]]]:
|
|
105
108
|
"""
|
|
106
109
|
Get attributes defined by registered `self.get_attribute_names()` in a multi step process:
|
|
107
110
|
|
|
@@ -126,6 +129,9 @@ class ImageAnnotationBaseView(ImageAnnotation):
|
|
|
126
129
|
if isinstance(sub_cat, ContainerAnnotation):
|
|
127
130
|
return sub_cat.value
|
|
128
131
|
return sub_cat.category_id
|
|
132
|
+
if item in self.relationships:
|
|
133
|
+
relationship_ids = self.get_relationship(get_type(item))
|
|
134
|
+
return self.base_page.get_annotation(annotation_ids=relationship_ids)
|
|
129
135
|
if self.image is not None:
|
|
130
136
|
if item in self.image.summary.sub_categories:
|
|
131
137
|
sub_cat = self.get_summary(get_type(item))
|
|
@@ -165,7 +171,11 @@ class Word(ImageAnnotationBaseView):
|
|
|
165
171
|
"""
|
|
166
172
|
|
|
167
173
|
def get_attribute_names(self) -> set[str]:
|
|
168
|
-
return
|
|
174
|
+
return (
|
|
175
|
+
set(WordType)
|
|
176
|
+
.union(super().get_attribute_names())
|
|
177
|
+
.union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
|
|
178
|
+
)
|
|
169
179
|
|
|
170
180
|
|
|
171
181
|
class Layout(ImageAnnotationBaseView):
|
|
@@ -246,7 +256,11 @@ class Layout(ImageAnnotationBaseView):
|
|
|
246
256
|
}
|
|
247
257
|
|
|
248
258
|
def get_attribute_names(self) -> set[str]:
|
|
249
|
-
return
|
|
259
|
+
return (
|
|
260
|
+
{"words", "text"}
|
|
261
|
+
.union(super().get_attribute_names())
|
|
262
|
+
.union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
|
|
263
|
+
)
|
|
250
264
|
|
|
251
265
|
def __len__(self) -> int:
|
|
252
266
|
"""len of text counted by number of characters"""
|
|
@@ -433,8 +447,8 @@ class ImageDefaults(TypedDict):
|
|
|
433
447
|
"""ImageDefaults"""
|
|
434
448
|
|
|
435
449
|
text_container: LayoutType
|
|
436
|
-
floating_text_block_categories: tuple[LayoutType, ...]
|
|
437
|
-
text_block_categories: tuple[LayoutType, ...]
|
|
450
|
+
floating_text_block_categories: tuple[Union[LayoutType, CellType], ...]
|
|
451
|
+
text_block_categories: tuple[Union[LayoutType, CellType], ...]
|
|
438
452
|
|
|
439
453
|
|
|
440
454
|
IMAGE_DEFAULTS: ImageDefaults = {
|
|
@@ -448,9 +462,13 @@ IMAGE_DEFAULTS: ImageDefaults = {
|
|
|
448
462
|
"text_block_categories": (
|
|
449
463
|
LayoutType.TEXT,
|
|
450
464
|
LayoutType.TITLE,
|
|
451
|
-
LayoutType.FIGURE,
|
|
452
465
|
LayoutType.LIST,
|
|
453
466
|
LayoutType.CELL,
|
|
467
|
+
LayoutType.FIGURE,
|
|
468
|
+
CellType.COLUMN_HEADER,
|
|
469
|
+
CellType.PROJECTED_ROW_HEADER,
|
|
470
|
+
CellType.SPANNING,
|
|
471
|
+
CellType.ROW_HEADER,
|
|
454
472
|
),
|
|
455
473
|
}
|
|
456
474
|
|
|
@@ -509,6 +527,9 @@ class Page(Image):
|
|
|
509
527
|
"location",
|
|
510
528
|
"document_id",
|
|
511
529
|
"page_number",
|
|
530
|
+
"angle",
|
|
531
|
+
"figures",
|
|
532
|
+
"residual_layouts",
|
|
512
533
|
}
|
|
513
534
|
include_residual_text_container: bool = True
|
|
514
535
|
|
|
@@ -607,6 +628,41 @@ class Page(Image):
|
|
|
607
628
|
"""
|
|
608
629
|
return self.get_annotation(category_names=LayoutType.TABLE)
|
|
609
630
|
|
|
631
|
+
@property
|
|
632
|
+
def figures(self) -> list[ImageAnnotationBaseView]:
|
|
633
|
+
"""
|
|
634
|
+
A list of a figures.
|
|
635
|
+
"""
|
|
636
|
+
return self.get_annotation(category_names=LayoutType.FIGURE)
|
|
637
|
+
|
|
638
|
+
@property
|
|
639
|
+
def residual_layouts(self) -> list[ImageAnnotationBaseView]:
|
|
640
|
+
"""
|
|
641
|
+
A list of all residual layouts. Residual layouts are all layouts that are
|
|
642
|
+
- not floating text blocks,
|
|
643
|
+
- not text containers,
|
|
644
|
+
- not tables,
|
|
645
|
+
- not figures
|
|
646
|
+
- not cells
|
|
647
|
+
- not rows
|
|
648
|
+
- not columns
|
|
649
|
+
"""
|
|
650
|
+
return self.get_annotation(category_names=self._get_residual_layout())
|
|
651
|
+
|
|
652
|
+
def _get_residual_layout(self) -> list[LiteralString]:
|
|
653
|
+
layouts = copy(list(self.floating_text_block_categories))
|
|
654
|
+
layouts.extend(
|
|
655
|
+
[
|
|
656
|
+
LayoutType.TABLE,
|
|
657
|
+
LayoutType.FIGURE,
|
|
658
|
+
self.text_container,
|
|
659
|
+
LayoutType.CELL,
|
|
660
|
+
LayoutType.ROW,
|
|
661
|
+
LayoutType.COLUMN,
|
|
662
|
+
]
|
|
663
|
+
)
|
|
664
|
+
return [layout for layout in LayoutType if layout not in layouts]
|
|
665
|
+
|
|
610
666
|
@classmethod
|
|
611
667
|
def from_image(
|
|
612
668
|
cls,
|
|
@@ -800,12 +856,15 @@ class Page(Image):
|
|
|
800
856
|
self,
|
|
801
857
|
show_tables: bool = True,
|
|
802
858
|
show_layouts: bool = True,
|
|
859
|
+
show_figures: bool = False,
|
|
860
|
+
show_residual_layouts: bool = False,
|
|
803
861
|
show_cells: bool = True,
|
|
804
862
|
show_table_structure: bool = True,
|
|
805
863
|
show_words: bool = False,
|
|
806
864
|
show_token_class: bool = True,
|
|
807
865
|
ignore_default_token_class: bool = False,
|
|
808
866
|
interactive: bool = False,
|
|
867
|
+
scaled_width: int = 600,
|
|
809
868
|
**debug_kwargs: str,
|
|
810
869
|
) -> Optional[PixelValues]:
|
|
811
870
|
"""
|
|
@@ -826,12 +885,14 @@ class Page(Image):
|
|
|
826
885
|
|
|
827
886
|
:param show_tables: Will display all tables boxes as well as cells, rows and columns
|
|
828
887
|
:param show_layouts: Will display all other layout components.
|
|
888
|
+
:param show_figures: Will display all figures
|
|
829
889
|
:param show_cells: Will display cells within tables. (Only available if `show_tables=True`)
|
|
830
890
|
:param show_table_structure: Will display rows and columns
|
|
831
891
|
:param show_words: Will display bounding boxes around words labeled with token class and bio tag (experimental)
|
|
832
892
|
:param show_token_class: Will display token class instead of token tags (i.e. token classes with tags)
|
|
833
893
|
:param interactive: If set to True will open an interactive image, otherwise it will return a numpy array that
|
|
834
894
|
can be displayed differently.
|
|
895
|
+
:param scaled_width: Width of the image to display
|
|
835
896
|
:param ignore_default_token_class: Will ignore displaying word bounding boxes with default or None token class
|
|
836
897
|
label
|
|
837
898
|
:return: If `interactive=False` will return a numpy array.
|
|
@@ -857,6 +918,11 @@ class Page(Image):
|
|
|
857
918
|
box_stack.append(item.bbox)
|
|
858
919
|
category_names_list.append(item.category_name.value)
|
|
859
920
|
|
|
921
|
+
if show_figures and not debug_kwargs:
|
|
922
|
+
for item in self.figures:
|
|
923
|
+
box_stack.append(item.bbox)
|
|
924
|
+
category_names_list.append(item.category_name.value)
|
|
925
|
+
|
|
860
926
|
if show_tables and not debug_kwargs:
|
|
861
927
|
for table in self.tables:
|
|
862
928
|
box_stack.append(table.bbox)
|
|
@@ -913,24 +979,34 @@ class Page(Image):
|
|
|
913
979
|
else:
|
|
914
980
|
category_names_list.append(word.token_tag.value if word.token_tag is not None else None)
|
|
915
981
|
|
|
982
|
+
if show_residual_layouts and not debug_kwargs:
|
|
983
|
+
for item in self.residual_layouts:
|
|
984
|
+
box_stack.append(item.bbox)
|
|
985
|
+
category_names_list.append(item.category_name.value)
|
|
986
|
+
|
|
916
987
|
if self.image is not None:
|
|
988
|
+
scale_fx = scaled_width / self.width
|
|
989
|
+
scaled_height = int(self.height * scale_fx)
|
|
990
|
+
img = viz_handler.resize(self.image, scaled_width, scaled_height, "VIZ")
|
|
991
|
+
|
|
917
992
|
if box_stack:
|
|
918
993
|
boxes = np.vstack(box_stack)
|
|
994
|
+
boxes = box_to_point4(boxes)
|
|
995
|
+
resizer = ResizeTransform(self.height, self.width, scaled_height, scaled_width, "VIZ")
|
|
996
|
+
boxes = resizer.apply_coords(boxes)
|
|
997
|
+
boxes = point4_to_box(boxes)
|
|
919
998
|
if show_words:
|
|
920
999
|
img = draw_boxes(
|
|
921
|
-
|
|
922
|
-
boxes,
|
|
923
|
-
category_names_list,
|
|
1000
|
+
np_image=img,
|
|
1001
|
+
boxes=boxes,
|
|
1002
|
+
category_names_list=category_names_list,
|
|
924
1003
|
font_scale=1.0,
|
|
925
1004
|
rectangle_thickness=4,
|
|
926
1005
|
)
|
|
927
1006
|
else:
|
|
928
|
-
img = draw_boxes(
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
|
|
932
|
-
else:
|
|
933
|
-
img = self.image
|
|
1007
|
+
img = draw_boxes(
|
|
1008
|
+
np_image=img, boxes=boxes, category_names_list=category_names_list, show_palette=False
|
|
1009
|
+
)
|
|
934
1010
|
|
|
935
1011
|
if interactive:
|
|
936
1012
|
interactive_imshow(img)
|
|
@@ -71,8 +71,8 @@ https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeva
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def _summarize( # type: ignore
|
|
74
|
-
self, ap: int = 1, iouThr: float = 0.9, areaRng: str = "all", maxDets: int = 100
|
|
75
|
-
) -> float:
|
|
74
|
+
self, ap: int = 1, iouThr: float = 0.9, areaRng: str = "all", maxDets: int = 100, per_category: bool = False
|
|
75
|
+
) -> Union[float, list[float]]:
|
|
76
76
|
# pylint: disable=C0103
|
|
77
77
|
p = self.params
|
|
78
78
|
iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
|
|
@@ -86,6 +86,36 @@ def _summarize( # type: ignore
|
|
|
86
86
|
|
|
87
87
|
aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
|
|
88
88
|
mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
|
|
89
|
+
if per_category:
|
|
90
|
+
if ap == 1:
|
|
91
|
+
s = self.eval["precision"]
|
|
92
|
+
num_classes = s.shape[2]
|
|
93
|
+
results_per_class = []
|
|
94
|
+
for idx in range(num_classes):
|
|
95
|
+
if iouThr is not None:
|
|
96
|
+
s = self.eval["precision"]
|
|
97
|
+
t = np.where(iouThr == p.iouThrs)[0]
|
|
98
|
+
s = s[t]
|
|
99
|
+
precision = s[:, :, idx, aind, mind]
|
|
100
|
+
precision = precision[precision > -1]
|
|
101
|
+
res = np.mean(precision) if precision.size else float("nan")
|
|
102
|
+
results_per_class.append(float(res))
|
|
103
|
+
print(f"Precision for class {idx+1}: @[ IoU={iouStr} | area={areaRng} | maxDets={maxDets} ] = {res}")
|
|
104
|
+
else:
|
|
105
|
+
s = self.eval["recall"]
|
|
106
|
+
num_classes = s.shape[1]
|
|
107
|
+
results_per_class = []
|
|
108
|
+
for idx in range(num_classes):
|
|
109
|
+
if iouThr is not None:
|
|
110
|
+
s = self.eval["recall"]
|
|
111
|
+
t = np.where(iouThr == p.iouThrs)[0]
|
|
112
|
+
s = s[t]
|
|
113
|
+
recall = s[:, idx, aind, mind]
|
|
114
|
+
recall = recall[recall > -1]
|
|
115
|
+
res = np.mean(recall) if recall.size else float("nan")
|
|
116
|
+
results_per_class.append(float(res))
|
|
117
|
+
print(f"Recall for class {idx+1}: @[ IoU={iouStr} | area={areaRng} | maxDets={maxDets} ] = {res}")
|
|
118
|
+
return results_per_class
|
|
89
119
|
if ap == 1:
|
|
90
120
|
# dimension of precision: [TxRxKxAxM]
|
|
91
121
|
s = self.eval["precision"]
|
|
@@ -124,6 +154,7 @@ class CocoMetric(MetricBase):
|
|
|
124
154
|
mapper = image_to_coco
|
|
125
155
|
_f1_score = None
|
|
126
156
|
_f1_iou = None
|
|
157
|
+
_per_category = False
|
|
127
158
|
_params: dict[str, Union[list[int], list[list[int]]]] = {}
|
|
128
159
|
|
|
129
160
|
@classmethod
|
|
@@ -176,18 +207,28 @@ class CocoMetric(MetricBase):
|
|
|
176
207
|
|
|
177
208
|
if cls._f1_score:
|
|
178
209
|
summary_bbox = [
|
|
179
|
-
metric.summarize_f1(1, cls._f1_iou, maxDets=metric.params.maxDets[2]),
|
|
180
|
-
metric.summarize_f1(0, cls._f1_iou, maxDets=metric.params.maxDets[2]),
|
|
210
|
+
metric.summarize_f1(1, cls._f1_iou, maxDets=metric.params.maxDets[2], per_category=cls._per_category),
|
|
211
|
+
metric.summarize_f1(0, cls._f1_iou, maxDets=metric.params.maxDets[2], per_category=cls._per_category),
|
|
181
212
|
]
|
|
182
213
|
else:
|
|
183
214
|
metric.summarize()
|
|
184
215
|
summary_bbox = metric.stats
|
|
185
216
|
|
|
186
217
|
results = []
|
|
187
|
-
|
|
218
|
+
|
|
219
|
+
default_parameters = cls.get_summary_default_parameters()
|
|
220
|
+
if cls._per_category:
|
|
221
|
+
default_parameters = default_parameters * len(summary_bbox[0])
|
|
222
|
+
summary_bbox = [item for pair in zip(*summary_bbox) for item in pair]
|
|
223
|
+
val = 0
|
|
224
|
+
for idx, (params, value) in enumerate(zip(default_parameters, summary_bbox)):
|
|
188
225
|
params = copy(params)
|
|
189
226
|
params["mode"] = "bbox"
|
|
190
227
|
params["val"] = value
|
|
228
|
+
if cls._per_category:
|
|
229
|
+
if idx % 2 == 0:
|
|
230
|
+
val += 1
|
|
231
|
+
params["category_id"] = val
|
|
191
232
|
results.append(params)
|
|
192
233
|
|
|
193
234
|
return results
|
|
@@ -201,15 +242,16 @@ class CocoMetric(MetricBase):
|
|
|
201
242
|
area range and maximum detections.
|
|
202
243
|
"""
|
|
203
244
|
if cls._f1_score:
|
|
245
|
+
for el, idx in zip(_F1_DEFAULTS, [2, 2]):
|
|
246
|
+
if cls._params:
|
|
247
|
+
if cls._params.get("maxDets") is not None:
|
|
248
|
+
el["maxDets"] = cls._params["maxDets"][idx]
|
|
249
|
+
el["iouThr"] = cls._f1_iou
|
|
250
|
+
return _F1_DEFAULTS
|
|
251
|
+
|
|
252
|
+
for el, idx in zip(_COCOEVAL_DEFAULTS, _MAX_DET_INDEX):
|
|
204
253
|
if cls._params:
|
|
205
254
|
if cls._params.get("maxDets") is not None:
|
|
206
|
-
for el, idx in zip(_F1_DEFAULTS, [2, 2]):
|
|
207
|
-
el["maxDets"] = cls._params["maxDets"][idx]
|
|
208
|
-
el["iouThr"] = cls._f1_iou
|
|
209
|
-
return _F1_DEFAULTS
|
|
210
|
-
if cls._params:
|
|
211
|
-
if cls._params.get("maxDets") is not None:
|
|
212
|
-
for el, idx in zip(_COCOEVAL_DEFAULTS, _MAX_DET_INDEX):
|
|
213
255
|
el["maxDets"] = cls._params["maxDets"][idx]
|
|
214
256
|
return _COCOEVAL_DEFAULTS
|
|
215
257
|
|
|
@@ -220,13 +262,16 @@ class CocoMetric(MetricBase):
|
|
|
220
262
|
area_range: Optional[list[list[int]]] = None,
|
|
221
263
|
f1_score: bool = False,
|
|
222
264
|
f1_iou: float = 0.9,
|
|
265
|
+
per_category: bool = False,
|
|
223
266
|
) -> None:
|
|
224
267
|
"""
|
|
225
268
|
Setting params for different coco metric modes.
|
|
226
269
|
|
|
227
270
|
:param max_detections: The maximum number of detections to consider
|
|
228
271
|
:param area_range: The area range to classify objects as "all", "small", "medium" and "large"
|
|
229
|
-
:param f1_score: Will use f1 score setting with default iouThr 0.9
|
|
272
|
+
:param f1_score: Will use f1 score setting with default iouThr 0.9. To be more precise it does not calculate
|
|
273
|
+
the f1 score but the precision and recall for a given iou threshold. Use the harmonic mean to
|
|
274
|
+
get the ultimate f1 score.
|
|
230
275
|
:param f1_iou: Use with f1_score True and reset the f1 iou threshold
|
|
231
276
|
"""
|
|
232
277
|
if max_detections is not None:
|
|
@@ -238,6 +283,7 @@ class CocoMetric(MetricBase):
|
|
|
238
283
|
|
|
239
284
|
cls._f1_score = f1_score
|
|
240
285
|
cls._f1_iou = f1_iou
|
|
286
|
+
cls._per_category = per_category
|
|
241
287
|
|
|
242
288
|
@classmethod
|
|
243
289
|
def get_requirements(cls) -> list[Requirement]:
|
deepdoctection/extern/pdftext.py
CHANGED
|
@@ -24,21 +24,25 @@ from typing import Optional
|
|
|
24
24
|
from lazy_imports import try_import
|
|
25
25
|
|
|
26
26
|
from ..utils.context import save_tmp_file
|
|
27
|
-
from ..utils.file_utils import get_pdfplumber_requirement
|
|
27
|
+
from ..utils.file_utils import get_pdfplumber_requirement, get_pypdfium2_requirement
|
|
28
28
|
from ..utils.settings import LayoutType, ObjectTypes
|
|
29
29
|
from ..utils.types import Requirement
|
|
30
30
|
from .base import DetectionResult, ModelCategories, PdfMiner
|
|
31
31
|
|
|
32
|
-
with try_import() as
|
|
32
|
+
with try_import() as pdfplumber_import_guard:
|
|
33
33
|
from pdfplumber.pdf import PDF, Page
|
|
34
34
|
|
|
35
|
+
with try_import() as pypdfmium_import_guard:
|
|
36
|
+
import pypdfium2.raw as pypdfium_c
|
|
37
|
+
from pypdfium2 import PdfDocument
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
|
|
40
|
+
def _to_detect_result(word: dict[str, str], class_name: ObjectTypes) -> DetectionResult:
|
|
37
41
|
return DetectionResult(
|
|
38
42
|
box=[float(word["x0"]), float(word["top"]), float(word["x1"]), float(word["bottom"])],
|
|
39
43
|
class_id=1,
|
|
40
44
|
text=word["text"],
|
|
41
|
-
class_name=
|
|
45
|
+
class_name=class_name,
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
|
|
@@ -49,6 +53,7 @@ class PdfPlumberTextDetector(PdfMiner):
|
|
|
49
53
|
|
|
50
54
|
pdf_plumber = PdfPlumberTextDetector()
|
|
51
55
|
df = SerializerPdfDoc.load("path/to/document.pdf")
|
|
56
|
+
df.reset_state()
|
|
52
57
|
|
|
53
58
|
for dp in df:
|
|
54
59
|
detection_results = pdf_plumber.predict(dp["pdf_bytes"])
|
|
@@ -61,6 +66,8 @@ class PdfPlumberTextDetector(PdfMiner):
|
|
|
61
66
|
pipe = DoctectionPipe([text_extract])
|
|
62
67
|
|
|
63
68
|
df = pipe.analyze(path="path/to/document.pdf")
|
|
69
|
+
df.reset_state()
|
|
70
|
+
|
|
64
71
|
for dp in df:
|
|
65
72
|
...
|
|
66
73
|
|
|
@@ -87,7 +94,7 @@ class PdfPlumberTextDetector(PdfMiner):
|
|
|
87
94
|
self._page = PDF(fin).pages[0]
|
|
88
95
|
self._pdf_bytes = pdf_bytes
|
|
89
96
|
words = self._page.extract_words(x_tolerance=self.x_tolerance, y_tolerance=self.y_tolerance)
|
|
90
|
-
detect_results =
|
|
97
|
+
detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
|
|
91
98
|
return detect_results
|
|
92
99
|
|
|
93
100
|
@classmethod
|
|
@@ -113,3 +120,87 @@ class PdfPlumberTextDetector(PdfMiner):
|
|
|
113
120
|
|
|
114
121
|
def get_category_names(self) -> tuple[ObjectTypes, ...]:
|
|
115
122
|
return self.categories.get_categories(as_dict=False)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class Pdfmium2TextDetector(PdfMiner):
|
|
126
|
+
"""
|
|
127
|
+
Text miner based on the pypdfium2 engine. It will return text on text line level and not on word level
|
|
128
|
+
|
|
129
|
+
pdfmium2 = Pdfmium2TextDetector()
|
|
130
|
+
df = SerializerPdfDoc.load("path/to/document.pdf")
|
|
131
|
+
df.reset_state()
|
|
132
|
+
|
|
133
|
+
for dp in df:
|
|
134
|
+
detection_results = pdfmium2.predict(dp["pdf_bytes"])
|
|
135
|
+
|
|
136
|
+
To use it in a more integrated way:
|
|
137
|
+
|
|
138
|
+
pdfmium2 = Pdfmium2TextDetector()
|
|
139
|
+
text_extract = TextExtractionService(pdfmium2)
|
|
140
|
+
|
|
141
|
+
pipe = DoctectionPipe([text_extract])
|
|
142
|
+
|
|
143
|
+
df = pipe.analyze(path="path/to/document.pdf")
|
|
144
|
+
df.reset_state()
|
|
145
|
+
for dp in df:
|
|
146
|
+
...
|
|
147
|
+
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(self) -> None:
|
|
151
|
+
self.name = "Pdfmium"
|
|
152
|
+
self.model_id = self.get_model_id()
|
|
153
|
+
self.categories = ModelCategories(init_categories={1: LayoutType.LINE})
|
|
154
|
+
self._page: Optional[Page] = None
|
|
155
|
+
|
|
156
|
+
def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
|
|
157
|
+
"""
|
|
158
|
+
Call pypdfium2 and returns detected text as detection results
|
|
159
|
+
|
|
160
|
+
:param pdf_bytes: bytes of a single pdf page
|
|
161
|
+
:return: A list of DetectionResult
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
pdf = PdfDocument(pdf_bytes)
|
|
165
|
+
page = pdf.get_page(0)
|
|
166
|
+
text = page.get_textpage()
|
|
167
|
+
words = []
|
|
168
|
+
height = page.get_height()
|
|
169
|
+
for obj in page.get_objects((pypdfium_c.FPDF_PAGEOBJ_TEXT,)):
|
|
170
|
+
box = obj.get_pos()
|
|
171
|
+
if all(x > 0 for x in box):
|
|
172
|
+
words.append(
|
|
173
|
+
{
|
|
174
|
+
"text": text.get_text_bounded(*box),
|
|
175
|
+
"x0": box[0],
|
|
176
|
+
"x1": box[2],
|
|
177
|
+
"top": height - box[3],
|
|
178
|
+
"bottom": height - box[1],
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
detect_results = [_to_detect_result(word, self.get_category_names()[0]) for word in words]
|
|
182
|
+
return detect_results
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def get_requirements(cls) -> list[Requirement]:
|
|
186
|
+
return [get_pypdfium2_requirement()]
|
|
187
|
+
|
|
188
|
+
def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
|
|
189
|
+
"""
|
|
190
|
+
Get the width and height of the full page
|
|
191
|
+
:param pdf_bytes: pdf_bytes generating the pdf
|
|
192
|
+
:return: width and height
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
if self._pdf_bytes == pdf_bytes and self._page is not None:
|
|
196
|
+
return self._page.bbox[2], self._page.bbox[3] # pylint: disable=E1101
|
|
197
|
+
# if the pdf bytes is not equal to the cached pdf, will recalculate values
|
|
198
|
+
pdf = PdfDocument(pdf_bytes)
|
|
199
|
+
self._page = pdf.get_page(0)
|
|
200
|
+
self._pdf_bytes = pdf_bytes
|
|
201
|
+
if self._page is not None:
|
|
202
|
+
return self._page.get_width(), self._page.get_height() # type: ignore
|
|
203
|
+
raise ValueError("Page not found")
|
|
204
|
+
|
|
205
|
+
def get_category_names(self) -> tuple[ObjectTypes, ...]:
|
|
206
|
+
return self.categories.get_categories(as_dict=False)
|
deepdoctection/extern/tessocr.py
CHANGED
|
@@ -421,6 +421,7 @@ class TesseractRotationTransformer(ImageTransformer):
|
|
|
421
421
|
def __init__(self) -> None:
|
|
422
422
|
self.name = fspath(_TESS_PATH) + "-rotation"
|
|
423
423
|
self.categories = ModelCategories(init_categories={1: PageType.ANGLE})
|
|
424
|
+
self.model_id = self.get_model_id()
|
|
424
425
|
|
|
425
426
|
def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
|
|
426
427
|
"""
|
deepdoctection/mapper/match.py
CHANGED
|
@@ -193,5 +193,7 @@ def match_anns_by_distance(
|
|
|
193
193
|
child_anns = dp.get_annotation(annotation_ids=child_ann_ids, category_names=child_ann_category_names)
|
|
194
194
|
child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
|
|
195
195
|
parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
|
|
196
|
-
|
|
197
|
-
|
|
196
|
+
if child_centers and parent_centers:
|
|
197
|
+
child_indices = distance.cdist(parent_centers, child_centers).argmin(axis=1)
|
|
198
|
+
return [(parent_anns[i], child_anns[j]) for i, j in enumerate(child_indices)]
|
|
199
|
+
return []
|