deepdoctection 0.39.7__py3-none-any.whl → 0.40.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/_config.py +0 -1
- deepdoctection/analyzer/factory.py +34 -13
- deepdoctection/datapoint/image.py +5 -5
- deepdoctection/datapoint/view.py +5 -5
- deepdoctection/mapper/match.py +28 -8
- deepdoctection/pipe/anngen.py +1 -25
- deepdoctection/pipe/common.py +91 -38
- deepdoctection/pipe/layout.py +26 -13
- deepdoctection/pipe/order.py +6 -22
- deepdoctection/pipe/segment.py +36 -43
- deepdoctection/pipe/sub_layout.py +1 -10
- deepdoctection/pipe/text.py +5 -14
- deepdoctection/train/hf_detr_train.py +1 -0
- {deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/METADATA +1 -1
- {deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/RECORD +19 -19
- {deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/WHEEL +1 -1
- {deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.39.7.dist-info → deepdoctection-0.40.0.dist-info}/top_level.txt +0 -0
deepdoctection/__init__.py
CHANGED
|
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
|
|
|
25
25
|
|
|
26
26
|
# pylint: enable=wrong-import-position
|
|
27
27
|
|
|
28
|
-
__version__ = "0.
|
|
28
|
+
__version__ = "0.40.0"
|
|
29
29
|
|
|
30
30
|
_IMPORT_STRUCTURE = {
|
|
31
31
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
|
|
@@ -260,6 +260,7 @@ _IMPORT_STRUCTURE = {
|
|
|
260
260
|
"ImageCroppingService",
|
|
261
261
|
"IntersectionMatcher",
|
|
262
262
|
"NeighbourMatcher",
|
|
263
|
+
"FamilyCompound",
|
|
263
264
|
"MatchingService",
|
|
264
265
|
"PageParsingService",
|
|
265
266
|
"AnnotationNmsService",
|
|
@@ -72,7 +72,6 @@ cfg.SEGMENTATION.THRESHOLD_COLS = 0.4
|
|
|
72
72
|
cfg.SEGMENTATION.FULL_TABLE_TILING = True
|
|
73
73
|
cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS = 0.001
|
|
74
74
|
cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS = 0.001
|
|
75
|
-
cfg.SEGMENTATION.CELL_CATEGORY_ID = 12
|
|
76
75
|
cfg.SEGMENTATION.TABLE_NAME = LayoutType.TABLE
|
|
77
76
|
cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
|
|
78
77
|
CellType.SPANNING,
|
|
@@ -35,13 +35,14 @@ from ..extern.tpdetect import TPFrcnnDetector
|
|
|
35
35
|
from ..pipe.base import PipelineComponent
|
|
36
36
|
from ..pipe.common import (
|
|
37
37
|
AnnotationNmsService,
|
|
38
|
+
FamilyCompound,
|
|
38
39
|
IntersectionMatcher,
|
|
39
40
|
MatchingService,
|
|
40
41
|
NeighbourMatcher,
|
|
41
42
|
PageParsingService,
|
|
42
43
|
)
|
|
43
44
|
from ..pipe.doctectionpipe import DoctectionPipe
|
|
44
|
-
from ..pipe.layout import ImageLayoutService
|
|
45
|
+
from ..pipe.layout import ImageLayoutService, skip_if_category_or_service_extracted
|
|
45
46
|
from ..pipe.order import TextOrderService
|
|
46
47
|
from ..pipe.refine import TableSegmentationRefinementService
|
|
47
48
|
from ..pipe.segment import PubtablesSegmentationService, TableSegmentationService
|
|
@@ -284,7 +285,6 @@ class ServiceFactory:
|
|
|
284
285
|
return SubImageLayoutService(
|
|
285
286
|
sub_image_detector=detector,
|
|
286
287
|
sub_image_names=[LayoutType.TABLE, LayoutType.TABLE_ROTATED],
|
|
287
|
-
category_id_mapping=None,
|
|
288
288
|
detect_result_generator=detect_result_generator,
|
|
289
289
|
padder=padder,
|
|
290
290
|
)
|
|
@@ -405,7 +405,6 @@ class ServiceFactory:
|
|
|
405
405
|
tile_table_with_items=config.SEGMENTATION.FULL_TABLE_TILING,
|
|
406
406
|
remove_iou_threshold_rows=config.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS,
|
|
407
407
|
remove_iou_threshold_cols=config.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS,
|
|
408
|
-
cell_class_id=config.SEGMENTATION.CELL_CATEGORY_ID,
|
|
409
408
|
table_name=config.SEGMENTATION.TABLE_NAME,
|
|
410
409
|
cell_names=config.SEGMENTATION.PUBTABLES_CELL_NAMES,
|
|
411
410
|
spanning_cell_names=config.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES,
|
|
@@ -516,6 +515,15 @@ class ServiceFactory:
|
|
|
516
515
|
"""
|
|
517
516
|
return ServiceFactory._build_pdf_miner_text_service(detector)
|
|
518
517
|
|
|
518
|
+
@staticmethod
|
|
519
|
+
def _build_doctr_word_detector_service(detector: DoctrTextlineDetector) -> ImageLayoutService:
|
|
520
|
+
"""Building a Doctr word detector service
|
|
521
|
+
|
|
522
|
+
:param detector: DoctrTextlineDetector
|
|
523
|
+
:return: ImageLayoutService
|
|
524
|
+
"""
|
|
525
|
+
return ImageLayoutService(layout_detector=detector, to_image=True, crop_image=True)
|
|
526
|
+
|
|
519
527
|
@staticmethod
|
|
520
528
|
def build_doctr_word_detector_service(detector: DoctrTextlineDetector) -> ImageLayoutService:
|
|
521
529
|
"""Building a Doctr word detector service
|
|
@@ -523,9 +531,7 @@ class ServiceFactory:
|
|
|
523
531
|
:param detector: DoctrTextlineDetector
|
|
524
532
|
:return: ImageLayoutService
|
|
525
533
|
"""
|
|
526
|
-
return
|
|
527
|
-
layout_detector=detector, to_image=True, crop_image=True, skip_if_layout_extracted=True
|
|
528
|
-
)
|
|
534
|
+
return ServiceFactory._build_doctr_word_detector_service(detector)
|
|
529
535
|
|
|
530
536
|
@staticmethod
|
|
531
537
|
def _build_text_extraction_service(
|
|
@@ -539,7 +545,6 @@ class ServiceFactory:
|
|
|
539
545
|
"""
|
|
540
546
|
return TextExtractionService(
|
|
541
547
|
detector,
|
|
542
|
-
skip_if_text_extracted=config.USE_PDF_MINER,
|
|
543
548
|
extract_from_roi=config.TEXT_CONTAINER if config.OCR.USE_DOCTR else None,
|
|
544
549
|
)
|
|
545
550
|
|
|
@@ -567,11 +572,16 @@ class ServiceFactory:
|
|
|
567
572
|
threshold=config.WORD_MATCHING.THRESHOLD,
|
|
568
573
|
max_parent_only=config.WORD_MATCHING.MAX_PARENT_ONLY,
|
|
569
574
|
)
|
|
575
|
+
family_compounds = [
|
|
576
|
+
FamilyCompound(
|
|
577
|
+
parent_categories=config.WORD_MATCHING.PARENTAL_CATEGORIES,
|
|
578
|
+
child_categories=config.TEXT_CONTAINER,
|
|
579
|
+
relationship_key=Relationships.CHILD,
|
|
580
|
+
)
|
|
581
|
+
]
|
|
570
582
|
return MatchingService(
|
|
571
|
-
|
|
572
|
-
child_categories=config.TEXT_CONTAINER,
|
|
583
|
+
family_compounds=family_compounds,
|
|
573
584
|
matcher=matcher,
|
|
574
|
-
relationship_key=Relationships.CHILD,
|
|
575
585
|
)
|
|
576
586
|
|
|
577
587
|
@staticmethod
|
|
@@ -591,11 +601,16 @@ class ServiceFactory:
|
|
|
591
601
|
:return: MatchingService
|
|
592
602
|
"""
|
|
593
603
|
neighbor_matcher = NeighbourMatcher()
|
|
604
|
+
family_compounds = [
|
|
605
|
+
FamilyCompound(
|
|
606
|
+
parent_categories=config.LAYOUT_LINK.PARENTAL_CATEGORIES,
|
|
607
|
+
child_categories=config.LAYOUT_LINK.CHILD_CATEGORIES,
|
|
608
|
+
relationship_key=Relationships.LAYOUT_LINK,
|
|
609
|
+
)
|
|
610
|
+
]
|
|
594
611
|
return MatchingService(
|
|
595
|
-
|
|
596
|
-
child_categories=config.LAYOUT_LINK.CHILD_CATEGORIES,
|
|
612
|
+
family_compounds=family_compounds,
|
|
597
613
|
matcher=neighbor_matcher,
|
|
598
|
-
relationship_key=Relationships.LAYOUT_LINK,
|
|
599
614
|
)
|
|
600
615
|
|
|
601
616
|
@staticmethod
|
|
@@ -699,9 +714,11 @@ class ServiceFactory:
|
|
|
699
714
|
table_refinement_service = ServiceFactory.build_table_refinement_service(config)
|
|
700
715
|
pipe_component_list.append(table_refinement_service)
|
|
701
716
|
|
|
717
|
+
d_text_service_id = ""
|
|
702
718
|
if config.USE_PDF_MINER:
|
|
703
719
|
pdf_miner = ServiceFactory.build_pdf_text_detector(config)
|
|
704
720
|
d_text = ServiceFactory.build_pdf_miner_text_service(pdf_miner)
|
|
721
|
+
d_text_service_id = d_text.service_id
|
|
705
722
|
pipe_component_list.append(d_text)
|
|
706
723
|
|
|
707
724
|
# setup ocr
|
|
@@ -710,10 +727,14 @@ class ServiceFactory:
|
|
|
710
727
|
if config.OCR.USE_DOCTR:
|
|
711
728
|
word_detector = ServiceFactory.build_doctr_word_detector(config)
|
|
712
729
|
word_service = ServiceFactory.build_doctr_word_detector_service(word_detector)
|
|
730
|
+
word_service.set_inbound_filter(skip_if_category_or_service_extracted(service_ids=d_text_service_id))
|
|
713
731
|
pipe_component_list.append(word_service)
|
|
714
732
|
|
|
715
733
|
ocr_detector = ServiceFactory.build_ocr_detector(config)
|
|
716
734
|
text_extraction_service = ServiceFactory.build_text_extraction_service(config, ocr_detector)
|
|
735
|
+
text_extraction_service.set_inbound_filter(
|
|
736
|
+
skip_if_category_or_service_extracted(service_ids=d_text_service_id)
|
|
737
|
+
)
|
|
717
738
|
pipe_component_list.append(text_extraction_service)
|
|
718
739
|
|
|
719
740
|
if config.USE_PDF_MINER or config.USE_OCR:
|
|
@@ -342,7 +342,7 @@ class Image:
|
|
|
342
342
|
self,
|
|
343
343
|
category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
|
|
344
344
|
annotation_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
345
|
-
|
|
345
|
+
service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
346
346
|
model_id: Optional[Union[str, Sequence[str]]] = None,
|
|
347
347
|
session_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
348
348
|
ignore_inactive: bool = True,
|
|
@@ -356,7 +356,7 @@ class Image:
|
|
|
356
356
|
|
|
357
357
|
:param category_names: A single name or list of names
|
|
358
358
|
:param annotation_ids: A single id or list of ids
|
|
359
|
-
:param
|
|
359
|
+
:param service_ids: A single service name or list of service names
|
|
360
360
|
:param model_id: A single model name or list of model names
|
|
361
361
|
:param session_ids: A single session id or list of session ids
|
|
362
362
|
:param ignore_inactive: If set to `True` only active annotations are returned.
|
|
@@ -372,7 +372,7 @@ class Image:
|
|
|
372
372
|
)
|
|
373
373
|
|
|
374
374
|
ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
|
|
375
|
-
|
|
375
|
+
service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
|
|
376
376
|
model_id = [model_id] if isinstance(model_id, str) else model_id
|
|
377
377
|
session_id = [session_ids] if isinstance(session_ids, str) else session_ids
|
|
378
378
|
|
|
@@ -387,8 +387,8 @@ class Image:
|
|
|
387
387
|
if ann_ids is not None:
|
|
388
388
|
anns = filter(lambda x: x.annotation_id in ann_ids, anns)
|
|
389
389
|
|
|
390
|
-
if
|
|
391
|
-
anns = filter(lambda x: x.service_id in
|
|
390
|
+
if service_ids is not None:
|
|
391
|
+
anns = filter(lambda x: x.service_id in service_ids, anns)
|
|
392
392
|
|
|
393
393
|
if model_id is not None:
|
|
394
394
|
anns = filter(lambda x: x.model_id in model_id, anns)
|
deepdoctection/datapoint/view.py
CHANGED
|
@@ -659,7 +659,7 @@ class Page(Image):
|
|
|
659
659
|
self,
|
|
660
660
|
category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
|
|
661
661
|
annotation_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
662
|
-
|
|
662
|
+
service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
663
663
|
model_id: Optional[Union[str, Sequence[str]]] = None,
|
|
664
664
|
session_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
665
665
|
ignore_inactive: bool = True,
|
|
@@ -676,7 +676,7 @@ class Page(Image):
|
|
|
676
676
|
|
|
677
677
|
:param category_names: A single name or list of names
|
|
678
678
|
:param annotation_ids: A single id or list of ids
|
|
679
|
-
:param
|
|
679
|
+
:param service_ids: A single service name or list of service names
|
|
680
680
|
:param model_id: A single model name or list of model names
|
|
681
681
|
:param session_ids: A single session id or list of session ids
|
|
682
682
|
:param ignore_inactive: If set to `True` only active annotations are returned.
|
|
@@ -691,7 +691,7 @@ class Page(Image):
|
|
|
691
691
|
else tuple(get_type(cat_name) for cat_name in category_names)
|
|
692
692
|
)
|
|
693
693
|
ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
|
|
694
|
-
|
|
694
|
+
service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
|
|
695
695
|
model_id = [model_id] if isinstance(model_id, str) else model_id
|
|
696
696
|
session_id = [session_ids] if isinstance(session_ids, str) else session_ids
|
|
697
697
|
|
|
@@ -706,8 +706,8 @@ class Page(Image):
|
|
|
706
706
|
if ann_ids is not None:
|
|
707
707
|
anns = filter(lambda x: x.annotation_id in ann_ids, anns)
|
|
708
708
|
|
|
709
|
-
if
|
|
710
|
-
anns = filter(lambda x: x.generating_service in
|
|
709
|
+
if service_ids is not None:
|
|
710
|
+
anns = filter(lambda x: x.generating_service in service_ids, anns)
|
|
711
711
|
|
|
712
712
|
if model_id is not None:
|
|
713
713
|
anns = filter(lambda x: x.generating_model in model_id, anns)
|
deepdoctection/mapper/match.py
CHANGED
|
@@ -34,13 +34,15 @@ from ..utils.settings import TypeOrStr
|
|
|
34
34
|
|
|
35
35
|
def match_anns_by_intersection(
|
|
36
36
|
dp: Image,
|
|
37
|
-
parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
38
|
-
child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
39
37
|
matching_rule: Literal["iou", "ioa"],
|
|
40
38
|
threshold: float,
|
|
41
39
|
use_weighted_intersections: bool = False,
|
|
40
|
+
parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
41
|
+
child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
42
42
|
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
43
43
|
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
44
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
45
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
44
46
|
max_parent_only: bool = False,
|
|
45
47
|
) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
|
|
46
48
|
"""
|
|
@@ -87,13 +89,19 @@ def match_anns_by_intersection(
|
|
|
87
89
|
dates which are not in the list.
|
|
88
90
|
:param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
89
91
|
candidates which are not in the list.
|
|
92
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
93
|
+
parent candidates which are not in the list.
|
|
94
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
95
|
+
children candidates which are not in the list.
|
|
90
96
|
:param max_parent_only: Will assign to each child at most one parent with maximum ioa
|
|
91
97
|
:return: child indices, parent indices (see Example), list of parent ids and list of children ids.
|
|
92
98
|
"""
|
|
93
99
|
|
|
94
100
|
assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
|
|
95
101
|
|
|
96
|
-
child_anns = dp.get_annotation(
|
|
102
|
+
child_anns = dp.get_annotation(
|
|
103
|
+
annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
|
|
104
|
+
)
|
|
97
105
|
child_ann_boxes = np.array(
|
|
98
106
|
[
|
|
99
107
|
ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
|
|
@@ -101,7 +109,9 @@ def match_anns_by_intersection(
|
|
|
101
109
|
]
|
|
102
110
|
)
|
|
103
111
|
|
|
104
|
-
parent_anns = dp.get_annotation(
|
|
112
|
+
parent_anns = dp.get_annotation(
|
|
113
|
+
annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
|
|
114
|
+
)
|
|
105
115
|
parent_ann_boxes = np.array(
|
|
106
116
|
[
|
|
107
117
|
ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
|
|
@@ -147,10 +157,12 @@ def match_anns_by_intersection(
|
|
|
147
157
|
|
|
148
158
|
def match_anns_by_distance(
|
|
149
159
|
dp: Image,
|
|
150
|
-
parent_ann_category_names:
|
|
151
|
-
child_ann_category_names:
|
|
160
|
+
parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
|
|
161
|
+
child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
|
|
152
162
|
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
153
163
|
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
164
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
165
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
154
166
|
) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
|
|
155
167
|
"""
|
|
156
168
|
Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
|
|
@@ -164,11 +176,19 @@ def match_anns_by_distance(
|
|
|
164
176
|
dates which are not in the list.
|
|
165
177
|
:param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
166
178
|
candidates which are not in the list.
|
|
179
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
180
|
+
parent candidates which are not in the list.
|
|
181
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
182
|
+
children candidates which are not in the list.
|
|
167
183
|
:return:
|
|
168
184
|
"""
|
|
169
185
|
|
|
170
|
-
parent_anns = dp.get_annotation(
|
|
171
|
-
|
|
186
|
+
parent_anns = dp.get_annotation(
|
|
187
|
+
annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
|
|
188
|
+
)
|
|
189
|
+
child_anns = dp.get_annotation(
|
|
190
|
+
annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
|
|
191
|
+
)
|
|
172
192
|
child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
|
|
173
193
|
parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
|
|
174
194
|
if child_centers and parent_centers:
|
deepdoctection/pipe/anngen.py
CHANGED
|
@@ -75,27 +75,6 @@ class DatapointManager:
|
|
|
75
75
|
"""
|
|
76
76
|
assert self.datapoint_is_passed, "Pass datapoint to DatapointManager before creating anns"
|
|
77
77
|
|
|
78
|
-
def maybe_map_category_id(self, category_id: Union[str, int]) -> int:
|
|
79
|
-
"""
|
|
80
|
-
Maps categories if a category id mapping is provided in `__init__`.
|
|
81
|
-
|
|
82
|
-
:param category_id: category id via integer or string.
|
|
83
|
-
:return: mapped category id
|
|
84
|
-
"""
|
|
85
|
-
if self.category_id_mapping is None:
|
|
86
|
-
return int(category_id)
|
|
87
|
-
return self.category_id_mapping[int(category_id)]
|
|
88
|
-
|
|
89
|
-
def set_category_id_mapping(self, category_id_mapping: Mapping[int, int]) -> None:
|
|
90
|
-
"""
|
|
91
|
-
In many cases the category ids sent back from a model have to be modified. Pass a mapping from model
|
|
92
|
-
category ids to target annotation category ids.
|
|
93
|
-
|
|
94
|
-
:param category_id_mapping: A mapping of model category ids (sent from DetectionResult) to category ids (saved
|
|
95
|
-
in annotations)
|
|
96
|
-
"""
|
|
97
|
-
self.category_id_mapping = category_id_mapping
|
|
98
|
-
|
|
99
78
|
def set_image_annotation(
|
|
100
79
|
self,
|
|
101
80
|
detect_result: DetectionResult,
|
|
@@ -127,13 +106,10 @@ class DatapointManager:
|
|
|
127
106
|
:return: the annotation_id of the generated image annotation
|
|
128
107
|
"""
|
|
129
108
|
self.assert_datapoint_passed()
|
|
130
|
-
if detect_result.class_id is None:
|
|
131
|
-
raise ValueError("class_id of detect_result cannot be None")
|
|
132
109
|
if not isinstance(detect_result.box, (list, np.ndarray)):
|
|
133
110
|
raise TypeError(
|
|
134
111
|
f"detect_result.box must be of type list or np.ndarray, but is of type {(type(detect_result.box))}"
|
|
135
112
|
)
|
|
136
|
-
detect_result.class_id = self.maybe_map_category_id(detect_result.class_id)
|
|
137
113
|
with MappingContextManager(
|
|
138
114
|
dp_name=self.datapoint.file_name, filter_level="annotation", detect_result=asdict(detect_result)
|
|
139
115
|
) as annotation_context:
|
|
@@ -155,7 +131,7 @@ class DatapointManager:
|
|
|
155
131
|
ann = ImageAnnotation(
|
|
156
132
|
category_name=detect_result.class_name,
|
|
157
133
|
bounding_box=box,
|
|
158
|
-
category_id=detect_result.class_id,
|
|
134
|
+
category_id=detect_result.class_id if detect_result.class_id is not None else DEFAULT_CATEGORY_ID,
|
|
159
135
|
score=detect_result.score,
|
|
160
136
|
service_id=self.service_id,
|
|
161
137
|
model_id=self.model_id,
|
deepdoctection/pipe/common.py
CHANGED
|
@@ -22,6 +22,7 @@ from __future__ import annotations
|
|
|
22
22
|
|
|
23
23
|
import os
|
|
24
24
|
from copy import deepcopy
|
|
25
|
+
from dataclasses import dataclass, field
|
|
25
26
|
from typing import Literal, Mapping, Optional, Sequence, Union
|
|
26
27
|
|
|
27
28
|
import numpy as np
|
|
@@ -49,24 +50,30 @@ class ImageCroppingService(PipelineComponent):
|
|
|
49
50
|
generally not stored.
|
|
50
51
|
"""
|
|
51
52
|
|
|
52
|
-
def __init__(
|
|
53
|
+
def __init__(
|
|
54
|
+
self, category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
55
|
+
service_ids: Optional[Sequence[str]] = None
|
|
56
|
+
) -> None:
|
|
53
57
|
"""
|
|
54
58
|
:param category_names: A single name or a list of category names to crop
|
|
55
59
|
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
if category_names is None:
|
|
61
|
+
self.category_names = None
|
|
62
|
+
else:
|
|
63
|
+
self.category_names = (
|
|
64
|
+
(category_names,)
|
|
65
|
+
if isinstance(category_names, str)
|
|
66
|
+
else tuple(get_type(category_name) for category_name in category_names)
|
|
67
|
+
)
|
|
68
|
+
self.service_ids = service_ids
|
|
62
69
|
super().__init__("image_crop")
|
|
63
70
|
|
|
64
71
|
def serve(self, dp: Image) -> None:
|
|
65
|
-
for ann in dp.get_annotation(category_names=self.category_names):
|
|
72
|
+
for ann in dp.get_annotation(category_names=self.category_names, service_ids=self.service_ids):
|
|
66
73
|
dp.image_ann_to_image(ann.annotation_id, crop_image=True)
|
|
67
74
|
|
|
68
75
|
def clone(self) -> ImageCroppingService:
|
|
69
|
-
return self.__class__(self.category_names)
|
|
76
|
+
return self.__class__(self.category_names, self.service_ids)
|
|
70
77
|
|
|
71
78
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
72
79
|
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
|
|
@@ -124,8 +131,10 @@ class IntersectionMatcher:
|
|
|
124
131
|
def match(
|
|
125
132
|
self,
|
|
126
133
|
dp: Image,
|
|
127
|
-
parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
128
|
-
child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
134
|
+
parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
135
|
+
child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
136
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
137
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
129
138
|
) -> list[tuple[str, str]]:
|
|
130
139
|
"""
|
|
131
140
|
The matching algorithm
|
|
@@ -133,6 +142,10 @@ class IntersectionMatcher:
|
|
|
133
142
|
:param dp: datapoint image
|
|
134
143
|
:param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
|
|
135
144
|
:param child_categories: list of categories to be used for a child class.
|
|
145
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
146
|
+
parent candidates which are not in the list.
|
|
147
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
148
|
+
children candidates which are not in the list.
|
|
136
149
|
|
|
137
150
|
:return: A list of tuples with parent and child annotation ids
|
|
138
151
|
"""
|
|
@@ -144,6 +157,8 @@ class IntersectionMatcher:
|
|
|
144
157
|
threshold=self.threshold,
|
|
145
158
|
use_weighted_intersections=self.use_weighted_intersections,
|
|
146
159
|
max_parent_only=self.max_parent_only,
|
|
160
|
+
parent_ann_service_ids=parent_ann_service_ids,
|
|
161
|
+
child_ann_service_ids=child_ann_service_ids,
|
|
147
162
|
)
|
|
148
163
|
|
|
149
164
|
matched_child_anns = np.take(child_anns, child_index) # type: ignore
|
|
@@ -174,8 +189,10 @@ class NeighbourMatcher:
|
|
|
174
189
|
def match(
|
|
175
190
|
self,
|
|
176
191
|
dp: Image,
|
|
177
|
-
parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
178
|
-
child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
192
|
+
parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
193
|
+
child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
194
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
195
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
179
196
|
) -> list[tuple[str, str]]:
|
|
180
197
|
"""
|
|
181
198
|
The matching algorithm
|
|
@@ -183,16 +200,54 @@ class NeighbourMatcher:
|
|
|
183
200
|
:param dp: datapoint image
|
|
184
201
|
:param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
|
|
185
202
|
:param child_categories: list of categories to be used for a child class.
|
|
203
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
204
|
+
parent candidates which are not in the list.
|
|
205
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
206
|
+
children candidates which are not in the list.
|
|
186
207
|
|
|
187
208
|
:return: A list of tuples with parent and child annotation ids
|
|
188
209
|
"""
|
|
189
210
|
|
|
190
211
|
return [
|
|
191
212
|
(pair[0].annotation_id, pair[1].annotation_id)
|
|
192
|
-
for pair in match_anns_by_distance(
|
|
213
|
+
for pair in match_anns_by_distance(
|
|
214
|
+
dp,
|
|
215
|
+
parent_ann_category_names=parent_categories,
|
|
216
|
+
child_ann_category_names=child_categories,
|
|
217
|
+
parent_ann_service_ids=parent_ann_service_ids,
|
|
218
|
+
child_ann_service_ids=child_ann_service_ids,
|
|
219
|
+
)
|
|
193
220
|
]
|
|
194
221
|
|
|
195
222
|
|
|
223
|
+
@dataclass
|
|
224
|
+
class FamilyCompound:
|
|
225
|
+
"""
|
|
226
|
+
A family compound is a set of parent and child categories that are related by a relationship key. The parent
|
|
227
|
+
categories will receive a relationship to the child categories.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
relationship_key: Relationships
|
|
231
|
+
parent_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
|
|
232
|
+
child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
|
|
233
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
|
|
234
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
|
|
235
|
+
|
|
236
|
+
def __post_init__(self) -> None:
|
|
237
|
+
if isinstance(self.parent_categories, str):
|
|
238
|
+
self.parent_categories = (get_type(self.parent_categories),)
|
|
239
|
+
elif self.parent_categories is not None:
|
|
240
|
+
self.parent_categories = tuple(get_type(parent) for parent in self.parent_categories)
|
|
241
|
+
if isinstance(self.child_categories, str):
|
|
242
|
+
self.child_categories = (get_type(self.child_categories),)
|
|
243
|
+
elif self.child_categories is not None:
|
|
244
|
+
self.child_categories = tuple(get_type(child) for child in self.child_categories)
|
|
245
|
+
if isinstance(self.parent_ann_service_ids, str):
|
|
246
|
+
self.parent_ann_service_ids = (self.parent_ann_service_ids,)
|
|
247
|
+
if isinstance(self.child_ann_service_ids, str):
|
|
248
|
+
self.child_ann_service_ids = (self.child_ann_service_ids,)
|
|
249
|
+
|
|
250
|
+
|
|
196
251
|
@pipeline_component_registry.register("MatchingService")
|
|
197
252
|
class MatchingService(PipelineComponent):
|
|
198
253
|
"""
|
|
@@ -202,28 +257,15 @@ class MatchingService(PipelineComponent):
|
|
|
202
257
|
|
|
203
258
|
def __init__(
|
|
204
259
|
self,
|
|
205
|
-
|
|
206
|
-
child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
260
|
+
family_compounds: Sequence[FamilyCompound],
|
|
207
261
|
matcher: Union[IntersectionMatcher, NeighbourMatcher],
|
|
208
|
-
relationship_key: Relationships,
|
|
209
262
|
) -> None:
|
|
210
263
|
"""
|
|
211
|
-
:param
|
|
212
|
-
:param
|
|
213
|
-
|
|
264
|
+
:param family_compounds: A list of FamilyCompounds
|
|
265
|
+
:param matcher: A matcher object
|
|
214
266
|
"""
|
|
215
|
-
self.
|
|
216
|
-
(get_type(parent_categories),)
|
|
217
|
-
if isinstance(parent_categories, str)
|
|
218
|
-
else tuple(get_type(category_name) for category_name in parent_categories)
|
|
219
|
-
)
|
|
220
|
-
self.child_categories = (
|
|
221
|
-
(get_type(child_categories),)
|
|
222
|
-
if isinstance(child_categories, str)
|
|
223
|
-
else (tuple(get_type(category_name) for category_name in child_categories))
|
|
224
|
-
)
|
|
267
|
+
self.family_compounds = family_compounds
|
|
225
268
|
self.matcher = matcher
|
|
226
|
-
self.relationship_key = relationship_key
|
|
227
269
|
super().__init__("matching")
|
|
228
270
|
|
|
229
271
|
def serve(self, dp: Image) -> None:
|
|
@@ -233,20 +275,31 @@ class MatchingService(PipelineComponent):
|
|
|
233
275
|
|
|
234
276
|
:param dp: datapoint image
|
|
235
277
|
"""
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
278
|
+
for family_compound in self.family_compounds:
|
|
279
|
+
matched_pairs = self.matcher.match(
|
|
280
|
+
dp,
|
|
281
|
+
parent_categories=family_compound.parent_categories,
|
|
282
|
+
child_categories=family_compound.child_categories,
|
|
283
|
+
parent_ann_service_ids=family_compound.parent_ann_service_ids,
|
|
284
|
+
child_ann_service_ids=family_compound.child_ann_service_ids,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
for pair in matched_pairs:
|
|
288
|
+
self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
|
|
241
289
|
|
|
242
290
|
def clone(self) -> PipelineComponent:
|
|
243
|
-
return self.__class__(self.
|
|
291
|
+
return self.__class__(self.family_compounds, self.matcher)
|
|
244
292
|
|
|
245
293
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
294
|
+
relationships: dict[ObjectTypes, set[ObjectTypes]] = {}
|
|
295
|
+
for family_compound in self.family_compounds:
|
|
296
|
+
if family_compound.parent_categories is not None:
|
|
297
|
+
for parent_category in family_compound.parent_categories:
|
|
298
|
+
relationships[parent_category] = {family_compound.relationship_key} # type: ignore
|
|
246
299
|
return MetaAnnotation(
|
|
247
300
|
image_annotations=(),
|
|
248
301
|
sub_categories={},
|
|
249
|
-
relationships=
|
|
302
|
+
relationships=relationships,
|
|
250
303
|
summaries=(),
|
|
251
304
|
)
|
|
252
305
|
|
deepdoctection/pipe/layout.py
CHANGED
|
@@ -20,18 +20,41 @@ Module for layout pipeline component
|
|
|
20
20
|
"""
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
|
-
from typing import Optional
|
|
23
|
+
from typing import Optional, Sequence, Union
|
|
24
24
|
|
|
25
25
|
import numpy as np
|
|
26
26
|
|
|
27
27
|
from ..datapoint.image import Image
|
|
28
28
|
from ..extern.base import ObjectDetector, PdfMiner
|
|
29
|
+
from ..mapper.misc import curry
|
|
29
30
|
from ..utils.error import ImageError
|
|
31
|
+
from ..utils.settings import ObjectTypes
|
|
30
32
|
from ..utils.transform import PadTransform
|
|
31
33
|
from .base import MetaAnnotation, PipelineComponent
|
|
32
34
|
from .registry import pipeline_component_registry
|
|
33
35
|
|
|
34
36
|
|
|
37
|
+
@curry
|
|
38
|
+
def skip_if_category_or_service_extracted(
|
|
39
|
+
dp: Image,
|
|
40
|
+
category_names: Optional[Union[str, Sequence[ObjectTypes]]] = None,
|
|
41
|
+
service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
42
|
+
) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Skip the processing of the pipeline component if the category or service is already extracted.
|
|
45
|
+
|
|
46
|
+
**Example**
|
|
47
|
+
|
|
48
|
+
detector = # some detector
|
|
49
|
+
item_component = ImageLayoutService(detector)
|
|
50
|
+
item_component.set_inbound_filter(skip_if_category_or_service_extracted(detector.get_categories(as_dict=False)))
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
if dp.get_annotation(category_names=category_names, service_ids=service_ids):
|
|
54
|
+
return True
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
|
|
35
58
|
@pipeline_component_registry.register("ImageLayoutService")
|
|
36
59
|
class ImageLayoutService(PipelineComponent):
|
|
37
60
|
"""
|
|
@@ -45,7 +68,7 @@ class ImageLayoutService(PipelineComponent):
|
|
|
45
68
|
|
|
46
69
|
**Example**
|
|
47
70
|
|
|
48
|
-
d_items = TPFrcnnDetector(item_config_path, item_weights_path, {
|
|
71
|
+
d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: 'row', 2: 'column'})
|
|
49
72
|
item_component = ImageLayoutService(d_items)
|
|
50
73
|
"""
|
|
51
74
|
|
|
@@ -55,7 +78,6 @@ class ImageLayoutService(PipelineComponent):
|
|
|
55
78
|
to_image: bool = False,
|
|
56
79
|
crop_image: bool = False,
|
|
57
80
|
padder: Optional[PadTransform] = None,
|
|
58
|
-
skip_if_layout_extracted: bool = False,
|
|
59
81
|
):
|
|
60
82
|
"""
|
|
61
83
|
:param layout_detector: object detector
|
|
@@ -65,23 +87,14 @@ class ImageLayoutService(PipelineComponent):
|
|
|
65
87
|
to its bounding box and populate the resulting sub image to
|
|
66
88
|
`ImageAnnotation.image.image`.
|
|
67
89
|
:param padder: If not `None`, will apply the padder to the image before prediction and inverse apply the padder
|
|
68
|
-
:param skip_if_layout_extracted: When `True` will check, if there are already `ImageAnnotation` of a category
|
|
69
|
-
available that will be predicted by the `layout_detector`. If yes, will skip
|
|
70
|
-
the prediction process.
|
|
71
90
|
"""
|
|
72
91
|
self.to_image = to_image
|
|
73
92
|
self.crop_image = crop_image
|
|
74
93
|
self.padder = padder
|
|
75
|
-
self.skip_if_layout_extracted = skip_if_layout_extracted
|
|
76
94
|
self.predictor = layout_detector
|
|
77
95
|
super().__init__(self._get_name(layout_detector.name), self.predictor.model_id)
|
|
78
96
|
|
|
79
97
|
def serve(self, dp: Image) -> None:
|
|
80
|
-
if self.skip_if_layout_extracted:
|
|
81
|
-
categories = self.predictor.get_category_names()
|
|
82
|
-
anns = dp.get_annotation(category_names=categories)
|
|
83
|
-
if anns:
|
|
84
|
-
return
|
|
85
98
|
if dp.image is None:
|
|
86
99
|
raise ImageError("image cannot be None")
|
|
87
100
|
np_image = dp.image
|
|
@@ -117,7 +130,7 @@ class ImageLayoutService(PipelineComponent):
|
|
|
117
130
|
padder_clone = self.padder.clone()
|
|
118
131
|
if not isinstance(predictor, ObjectDetector):
|
|
119
132
|
raise TypeError(f"predictor must be of type ObjectDetector, but is of type {type(predictor)}")
|
|
120
|
-
return self.__class__(predictor, self.to_image, self.crop_image, padder_clone
|
|
133
|
+
return self.__class__(predictor, self.to_image, self.crop_image, padder_clone)
|
|
121
134
|
|
|
122
135
|
def clear_predictor(self) -> None:
|
|
123
136
|
self.predictor.clear_model()
|
deepdoctection/pipe/order.py
CHANGED
|
@@ -347,19 +347,15 @@ class TextLineGenerator:
|
|
|
347
347
|
a paragraph break threshold. This allows to detect a multi column structure just by observing sub lines.
|
|
348
348
|
"""
|
|
349
349
|
|
|
350
|
-
def __init__(
|
|
351
|
-
self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
|
|
352
|
-
):
|
|
350
|
+
def __init__(self, make_sub_lines: bool, paragraph_break: Optional[float] = None):
|
|
353
351
|
"""
|
|
354
352
|
:param make_sub_lines: Whether to build sub lines from lines.
|
|
355
|
-
:param line_category_id: category_id to give a text line
|
|
356
353
|
:param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
|
|
357
354
|
will be built. We use relative coordinates to calculate the distance between two
|
|
358
355
|
consecutive words. A reasonable value is 0.035
|
|
359
356
|
"""
|
|
360
357
|
if make_sub_lines and paragraph_break is None:
|
|
361
358
|
raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
|
|
362
|
-
self.line_category_id = int(line_category_id)
|
|
363
359
|
self.make_sub_lines = make_sub_lines
|
|
364
360
|
self.paragraph_break = paragraph_break
|
|
365
361
|
|
|
@@ -367,7 +363,6 @@ class TextLineGenerator:
|
|
|
367
363
|
return DetectionResult(
|
|
368
364
|
box=box.to_list(mode="xyxy"),
|
|
369
365
|
class_name=LayoutType.LINE,
|
|
370
|
-
class_id=self.line_category_id,
|
|
371
366
|
absolute_coords=box.absolute_coords,
|
|
372
367
|
relationships=relationships,
|
|
373
368
|
)
|
|
@@ -475,18 +470,14 @@ class TextLineServiceMixin(PipelineComponent, ABC):
|
|
|
475
470
|
def __init__(
|
|
476
471
|
self,
|
|
477
472
|
name: str,
|
|
478
|
-
line_category_id: int = 1,
|
|
479
473
|
include_residual_text_container: bool = True,
|
|
480
474
|
paragraph_break: Optional[float] = None,
|
|
481
475
|
):
|
|
482
476
|
"""
|
|
483
|
-
Initialize the
|
|
477
|
+
Initialize the TextLineServiceMixin with a TextLineGenerator instance.
|
|
484
478
|
"""
|
|
485
|
-
self.line_category_id = line_category_id
|
|
486
479
|
self.include_residual_text_container = include_residual_text_container
|
|
487
|
-
self.text_line_generator = TextLineGenerator(
|
|
488
|
-
self.include_residual_text_container, self.line_category_id, paragraph_break
|
|
489
|
-
)
|
|
480
|
+
self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
|
|
490
481
|
super().__init__(name)
|
|
491
482
|
|
|
492
483
|
def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
|
|
@@ -523,17 +514,15 @@ class TextLineService(TextLineServiceMixin):
|
|
|
523
514
|
text lines and the words contained in the text lines. The reading order is not arranged.
|
|
524
515
|
"""
|
|
525
516
|
|
|
526
|
-
def __init__(self,
|
|
517
|
+
def __init__(self, paragraph_break: Optional[float] = None):
|
|
527
518
|
"""
|
|
528
519
|
Initialize `TextLineService`
|
|
529
520
|
|
|
530
|
-
:param line_category_id: category_id to give a text line
|
|
531
521
|
:param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
|
|
532
522
|
will be built
|
|
533
523
|
"""
|
|
534
524
|
super().__init__(
|
|
535
525
|
name="text_line",
|
|
536
|
-
line_category_id=line_category_id,
|
|
537
526
|
include_residual_text_container=True,
|
|
538
527
|
paragraph_break=paragraph_break,
|
|
539
528
|
)
|
|
@@ -542,7 +531,7 @@ class TextLineService(TextLineServiceMixin):
|
|
|
542
531
|
"""
|
|
543
532
|
This method returns a new instance of the class with the same configuration.
|
|
544
533
|
"""
|
|
545
|
-
return self.__class__(self.
|
|
534
|
+
return self.__class__(self.text_line_generator.paragraph_break)
|
|
546
535
|
|
|
547
536
|
def serve(self, dp: Image) -> None:
|
|
548
537
|
text_container_anns = dp.get_annotation(category_names=LayoutType.WORD)
|
|
@@ -605,7 +594,6 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
605
594
|
broken_line_tolerance: float = 0.003,
|
|
606
595
|
height_tolerance: float = 2.0,
|
|
607
596
|
paragraph_break: Optional[float] = 0.035,
|
|
608
|
-
line_category_id: int = 1,
|
|
609
597
|
):
|
|
610
598
|
"""
|
|
611
599
|
:param text_container: name of an image annotation that has a CHARS sub category. These annotations will be
|
|
@@ -647,12 +635,9 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
647
635
|
self.floating_text_block_categories = self.floating_text_block_categories + (LayoutType.LINE,)
|
|
648
636
|
self.include_residual_text_container = include_residual_text_container
|
|
649
637
|
self.order_generator = OrderGenerator(starting_point_tolerance, broken_line_tolerance, height_tolerance)
|
|
650
|
-
self.text_line_generator = TextLineGenerator(
|
|
651
|
-
self.include_residual_text_container, line_category_id, paragraph_break
|
|
652
|
-
)
|
|
638
|
+
self.text_line_generator = TextLineGenerator(self.include_residual_text_container, paragraph_break)
|
|
653
639
|
super().__init__(
|
|
654
640
|
name="text_order",
|
|
655
|
-
line_category_id=line_category_id,
|
|
656
641
|
include_residual_text_container=include_residual_text_container,
|
|
657
642
|
paragraph_break=paragraph_break,
|
|
658
643
|
)
|
|
@@ -763,7 +748,6 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
763
748
|
self.order_generator.broken_line_tolerance,
|
|
764
749
|
self.order_generator.height_tolerance,
|
|
765
750
|
self.text_line_generator.paragraph_break,
|
|
766
|
-
self.text_line_generator.line_category_id,
|
|
767
751
|
)
|
|
768
752
|
|
|
769
753
|
def clear_predictor(self) -> None:
|
deepdoctection/pipe/segment.py
CHANGED
|
@@ -436,24 +436,24 @@ def segment_table(
|
|
|
436
436
|
child_ann_ids = table.get_relationship(Relationships.CHILD)
|
|
437
437
|
cell_index_rows, row_index, _, _ = match_anns_by_intersection(
|
|
438
438
|
dp,
|
|
439
|
-
item_names[0],
|
|
440
|
-
cell_names,
|
|
441
|
-
segment_rule,
|
|
442
|
-
threshold_rows,
|
|
443
|
-
True,
|
|
444
|
-
child_ann_ids,
|
|
445
|
-
child_ann_ids,
|
|
439
|
+
parent_ann_category_names=item_names[0],
|
|
440
|
+
child_ann_category_names=cell_names,
|
|
441
|
+
matching_rule=segment_rule,
|
|
442
|
+
threshold=threshold_rows,
|
|
443
|
+
use_weighted_intersections=True,
|
|
444
|
+
parent_ann_ids=child_ann_ids,
|
|
445
|
+
child_ann_ids=child_ann_ids,
|
|
446
446
|
)
|
|
447
447
|
|
|
448
448
|
cell_index_cols, col_index, _, _ = match_anns_by_intersection(
|
|
449
449
|
dp,
|
|
450
|
-
item_names[1],
|
|
451
|
-
cell_names,
|
|
452
|
-
segment_rule,
|
|
453
|
-
threshold_cols,
|
|
454
|
-
True,
|
|
455
|
-
child_ann_ids,
|
|
456
|
-
child_ann_ids,
|
|
450
|
+
parent_ann_category_names=item_names[1],
|
|
451
|
+
child_ann_category_names=cell_names,
|
|
452
|
+
matching_rule=segment_rule,
|
|
453
|
+
threshold=threshold_cols,
|
|
454
|
+
use_weighted_intersections=True,
|
|
455
|
+
parent_ann_ids=child_ann_ids,
|
|
456
|
+
child_ann_ids=child_ann_ids,
|
|
457
457
|
)
|
|
458
458
|
|
|
459
459
|
cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=cell_names)
|
|
@@ -499,7 +499,6 @@ def create_intersection_cells(
|
|
|
499
499
|
rows: Sequence[ImageAnnotation],
|
|
500
500
|
cols: Sequence[ImageAnnotation],
|
|
501
501
|
table_annotation_id: str,
|
|
502
|
-
cell_class_id: int,
|
|
503
502
|
sub_item_names: Sequence[ObjectTypes],
|
|
504
503
|
) -> tuple[Sequence[DetectionResult], Sequence[SegmentationResult]]:
|
|
505
504
|
"""
|
|
@@ -509,7 +508,6 @@ def create_intersection_cells(
|
|
|
509
508
|
:param rows: list of rows
|
|
510
509
|
:param cols: list of columns
|
|
511
510
|
:param table_annotation_id: annotation_id of underlying table ImageAnnotation
|
|
512
|
-
:param cell_class_id: The class_id to a synthetically generated DetectionResult
|
|
513
511
|
:param sub_item_names: ObjectTypes for row-/column number
|
|
514
512
|
:return: Pair of lists of `DetectionResult` and `SegmentationResult`.
|
|
515
513
|
"""
|
|
@@ -526,7 +524,6 @@ def create_intersection_cells(
|
|
|
526
524
|
detect_result_cells.append(
|
|
527
525
|
DetectionResult(
|
|
528
526
|
box=boxes_cells[idx].to_list(mode="xyxy"),
|
|
529
|
-
class_id=cell_class_id,
|
|
530
527
|
absolute_coords=boxes_cells[idx].absolute_coords,
|
|
531
528
|
class_name=LayoutType.CELL,
|
|
532
529
|
)
|
|
@@ -574,13 +571,13 @@ def header_cell_to_item_detect_result(
|
|
|
574
571
|
child_ann_ids = table.get_relationship(Relationships.CHILD)
|
|
575
572
|
item_index, _, items, _ = match_anns_by_intersection(
|
|
576
573
|
dp,
|
|
577
|
-
item_header_name,
|
|
578
|
-
item_name,
|
|
579
|
-
segment_rule,
|
|
580
|
-
threshold,
|
|
581
|
-
True,
|
|
582
|
-
child_ann_ids,
|
|
583
|
-
child_ann_ids,
|
|
574
|
+
parent_ann_category_names=item_header_name,
|
|
575
|
+
child_ann_category_names=item_name,
|
|
576
|
+
matching_rule=segment_rule,
|
|
577
|
+
threshold=threshold,
|
|
578
|
+
use_weighted_intersections=True,
|
|
579
|
+
parent_ann_ids=child_ann_ids,
|
|
580
|
+
child_ann_ids=child_ann_ids,
|
|
584
581
|
)
|
|
585
582
|
item_headers = []
|
|
586
583
|
for idx, item in enumerate(items):
|
|
@@ -622,24 +619,24 @@ def segment_pubtables(
|
|
|
622
619
|
child_ann_ids = table.get_relationship(Relationships.CHILD)
|
|
623
620
|
cell_index_rows, row_index, _, _ = match_anns_by_intersection(
|
|
624
621
|
dp,
|
|
625
|
-
item_names[0],
|
|
626
|
-
spanning_cell_names,
|
|
627
|
-
segment_rule,
|
|
628
|
-
threshold_rows,
|
|
629
|
-
True,
|
|
630
|
-
child_ann_ids,
|
|
631
|
-
child_ann_ids,
|
|
622
|
+
parent_ann_category_names=item_names[0],
|
|
623
|
+
child_ann_category_names=spanning_cell_names,
|
|
624
|
+
matching_rule=segment_rule,
|
|
625
|
+
threshold=threshold_rows,
|
|
626
|
+
use_weighted_intersections=True,
|
|
627
|
+
parent_ann_ids=child_ann_ids,
|
|
628
|
+
child_ann_ids=child_ann_ids,
|
|
632
629
|
)
|
|
633
630
|
|
|
634
631
|
cell_index_cols, col_index, _, _ = match_anns_by_intersection(
|
|
635
632
|
dp,
|
|
636
|
-
item_names[1],
|
|
637
|
-
spanning_cell_names,
|
|
638
|
-
segment_rule,
|
|
639
|
-
threshold_cols,
|
|
640
|
-
True,
|
|
641
|
-
child_ann_ids,
|
|
642
|
-
child_ann_ids,
|
|
633
|
+
parent_ann_category_names=item_names[1],
|
|
634
|
+
child_ann_category_names=spanning_cell_names,
|
|
635
|
+
matching_rule=segment_rule,
|
|
636
|
+
threshold=threshold_cols,
|
|
637
|
+
use_weighted_intersections=True,
|
|
638
|
+
parent_ann_ids=child_ann_ids,
|
|
639
|
+
child_ann_ids=child_ann_ids,
|
|
643
640
|
)
|
|
644
641
|
|
|
645
642
|
spanning_cells = dp.get_annotation(annotation_ids=child_ann_ids, category_names=spanning_cell_names)
|
|
@@ -976,7 +973,6 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
976
973
|
tile_table_with_items: bool,
|
|
977
974
|
remove_iou_threshold_rows: float,
|
|
978
975
|
remove_iou_threshold_cols: float,
|
|
979
|
-
cell_class_id: int,
|
|
980
976
|
table_name: TypeOrStr,
|
|
981
977
|
cell_names: Sequence[TypeOrStr],
|
|
982
978
|
spanning_cell_names: Sequence[TypeOrStr],
|
|
@@ -997,7 +993,6 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
997
993
|
the adjacent row. Will do a similar shifting with columns.
|
|
998
994
|
:param remove_iou_threshold_rows: iou threshold for removing overlapping rows
|
|
999
995
|
:param remove_iou_threshold_cols: iou threshold for removing overlapping columns
|
|
1000
|
-
:param cell_class_id: 'category_id' for cells to be generated from intersected rows and columns
|
|
1001
996
|
:param table_name: layout type table
|
|
1002
997
|
:param cell_names: layout type of cells
|
|
1003
998
|
:param spanning_cell_names: layout type of spanning cells
|
|
@@ -1022,7 +1017,6 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
1022
1017
|
self.spanning_cell_names = [get_type(cell_name) for cell_name in spanning_cell_names]
|
|
1023
1018
|
self.remove_iou_threshold_rows = remove_iou_threshold_rows
|
|
1024
1019
|
self.remove_iou_threshold_cols = remove_iou_threshold_cols
|
|
1025
|
-
self.cell_class_id = cell_class_id
|
|
1026
1020
|
self.cell_to_image = cell_to_image
|
|
1027
1021
|
self.crop_cell_image = crop_cell_image
|
|
1028
1022
|
self.item_names = [get_type(item_name) for item_name in item_names] # row names must be before column name
|
|
@@ -1089,7 +1083,7 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
1089
1083
|
rows = dp.get_annotation(category_names=self.item_names[0], annotation_ids=item_ann_ids)
|
|
1090
1084
|
columns = dp.get_annotation(category_names=self.item_names[1], annotation_ids=item_ann_ids)
|
|
1091
1085
|
detect_result_cells, segment_result_cells = create_intersection_cells(
|
|
1092
|
-
rows, columns, table.annotation_id, self.
|
|
1086
|
+
rows, columns, table.annotation_id, self.sub_item_names
|
|
1093
1087
|
)
|
|
1094
1088
|
cell_rn_cn_to_ann_id = {}
|
|
1095
1089
|
for detect_result, segment_result in zip(detect_result_cells, segment_result_cells):
|
|
@@ -1228,7 +1222,6 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
1228
1222
|
self.tile_table,
|
|
1229
1223
|
self.remove_iou_threshold_rows,
|
|
1230
1224
|
self.remove_iou_threshold_cols,
|
|
1231
|
-
self.cell_class_id,
|
|
1232
1225
|
self.table_name,
|
|
1233
1226
|
self.cell_names,
|
|
1234
1227
|
self.spanning_cell_names,
|
|
@@ -92,7 +92,6 @@ class DetectResultGenerator:
|
|
|
92
92
|
detect_result_list.append(
|
|
93
93
|
DetectionResult(
|
|
94
94
|
box=[0.0, 0.0, float(self.width), float(self.height)], # type: ignore
|
|
95
|
-
class_id=self.categories_name_as_key[category_name],
|
|
96
95
|
class_name=category_name,
|
|
97
96
|
score=0.0,
|
|
98
97
|
absolute_coords=self.absolute_coords,
|
|
@@ -156,14 +155,13 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
156
155
|
detect_result_generator = DetectResultGenerator(categories_items)
|
|
157
156
|
d_items = TPFrcnnDetector(item_config_path, item_weights_path, {"1": LayoutType.row,
|
|
158
157
|
"2": LayoutType.column})
|
|
159
|
-
item_component = SubImageLayoutService(d_items, LayoutType.table,
|
|
158
|
+
item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
|
|
160
159
|
"""
|
|
161
160
|
|
|
162
161
|
def __init__(
|
|
163
162
|
self,
|
|
164
163
|
sub_image_detector: ObjectDetector,
|
|
165
164
|
sub_image_names: Union[str, Sequence[TypeOrStr]],
|
|
166
|
-
category_id_mapping: Optional[dict[int, int]] = None,
|
|
167
165
|
detect_result_generator: Optional[DetectResultGenerator] = None,
|
|
168
166
|
padder: Optional[PadTransform] = None,
|
|
169
167
|
):
|
|
@@ -186,7 +184,6 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
186
184
|
if isinstance(sub_image_names, str)
|
|
187
185
|
else tuple((get_type(cat) for cat in sub_image_names))
|
|
188
186
|
)
|
|
189
|
-
self.category_id_mapping = category_id_mapping
|
|
190
187
|
self.detect_result_generator = detect_result_generator
|
|
191
188
|
self.padder = padder
|
|
192
189
|
self.predictor = sub_image_detector
|
|
@@ -223,11 +220,6 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
223
220
|
detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
|
|
224
221
|
|
|
225
222
|
for detect_result in detect_result_list:
|
|
226
|
-
if self.category_id_mapping:
|
|
227
|
-
if detect_result.class_id:
|
|
228
|
-
detect_result.class_id = self.category_id_mapping.get(
|
|
229
|
-
detect_result.class_id, detect_result.class_id
|
|
230
|
-
)
|
|
231
223
|
self.dp_manager.set_image_annotation(detect_result, sub_image_ann.annotation_id)
|
|
232
224
|
|
|
233
225
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
@@ -254,7 +246,6 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
254
246
|
return self.__class__(
|
|
255
247
|
predictor,
|
|
256
248
|
self.sub_image_name,
|
|
257
|
-
self.category_id_mapping,
|
|
258
249
|
self.detect_result_generator,
|
|
259
250
|
padder_clone,
|
|
260
251
|
)
|
deepdoctection/pipe/text.py
CHANGED
|
@@ -70,7 +70,6 @@ class TextExtractionService(PipelineComponent):
|
|
|
70
70
|
text_extract_detector: Union[ObjectDetector, PdfMiner, TextRecognizer],
|
|
71
71
|
extract_from_roi: Optional[Union[Sequence[TypeOrStr], TypeOrStr]] = None,
|
|
72
72
|
run_time_ocr_language_selection: bool = False,
|
|
73
|
-
skip_if_text_extracted: bool = False,
|
|
74
73
|
):
|
|
75
74
|
"""
|
|
76
75
|
:param text_extract_detector: ObjectDetector
|
|
@@ -79,8 +78,6 @@ class TextExtractionService(PipelineComponent):
|
|
|
79
78
|
multiple language selections. Also requires that a language detection
|
|
80
79
|
pipeline component ran before. It will select the expert language OCR
|
|
81
80
|
model based on the determined language.
|
|
82
|
-
:param skip_if_text_extracted: Set to `True` if text has already been extracted in a previous pipeline component
|
|
83
|
-
and should not be extracted again. Use-case: A PDF with some scanned images.
|
|
84
81
|
"""
|
|
85
82
|
|
|
86
83
|
if extract_from_roi is None:
|
|
@@ -104,11 +101,6 @@ class TextExtractionService(PipelineComponent):
|
|
|
104
101
|
raise TypeError("Only TesseractOcrDetector supports multiple languages")
|
|
105
102
|
|
|
106
103
|
self.run_time_ocr_language_selection = run_time_ocr_language_selection
|
|
107
|
-
self.skip_if_text_extracted = skip_if_text_extracted
|
|
108
|
-
if self.skip_if_text_extracted and isinstance(self.predictor, TextRecognizer):
|
|
109
|
-
raise ValueError(
|
|
110
|
-
"skip_if_text_extracted=True and TextRecognizer in TextExtractionService is not compatible"
|
|
111
|
-
)
|
|
112
104
|
|
|
113
105
|
def serve(self, dp: Image) -> None:
|
|
114
106
|
maybe_batched_text_rois = self.get_text_rois(dp)
|
|
@@ -154,11 +146,6 @@ class TextExtractionService(PipelineComponent):
|
|
|
154
146
|
well `get_text_rois` will return an empty list.
|
|
155
147
|
:return: list of ImageAnnotation or Image
|
|
156
148
|
"""
|
|
157
|
-
if self.skip_if_text_extracted:
|
|
158
|
-
text_categories = self.predictor.get_category_names()
|
|
159
|
-
text_anns = dp.get_annotation(category_names=text_categories)
|
|
160
|
-
if text_anns:
|
|
161
|
-
return []
|
|
162
149
|
|
|
163
150
|
if self.extract_from_category:
|
|
164
151
|
if self.predictor.accepts_batch:
|
|
@@ -223,7 +210,11 @@ class TextExtractionService(PipelineComponent):
|
|
|
223
210
|
predictor = self.predictor.clone()
|
|
224
211
|
if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
|
|
225
212
|
raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
|
|
226
|
-
return self.__class__(
|
|
213
|
+
return self.__class__(
|
|
214
|
+
text_extract_detector=predictor,
|
|
215
|
+
extract_from_roi=deepcopy(self.extract_from_category),
|
|
216
|
+
run_time_ocr_language_selection=self.run_time_ocr_language_selection,
|
|
217
|
+
)
|
|
227
218
|
|
|
228
219
|
def clear_predictor(self) -> None:
|
|
229
220
|
self.predictor.clear_model()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
deepdoctection/__init__.py,sha256=
|
|
1
|
+
deepdoctection/__init__.py,sha256=Onsg4vkNNIGYytDmH96KsxYt3xQLxcAbyYHCeOqThR8,12780
|
|
2
2
|
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
|
|
4
|
-
deepdoctection/analyzer/_config.py,sha256=
|
|
4
|
+
deepdoctection/analyzer/_config.py,sha256=kxQzDQvl2ygH84VTnumbRF7JLGM6VeJoBzv1xssm6H4,5019
|
|
5
5
|
deepdoctection/analyzer/dd.py,sha256=bfR7e1JV7BwUNDRLu0jYZU7qQXnyA_vbRAJl2Ylrq5o,5905
|
|
6
|
-
deepdoctection/analyzer/factory.py,sha256=
|
|
6
|
+
deepdoctection/analyzer/factory.py,sha256=sXGL_faLkKCUBfq5YIpmzV5cWuvWChYy-zP5OtdaM4Y,33251
|
|
7
7
|
deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
|
|
8
8
|
deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
|
|
9
9
|
deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
|
|
@@ -19,8 +19,8 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
|
|
|
19
19
|
deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
|
|
20
20
|
deepdoctection/datapoint/box.py,sha256=XPhC_xHqLZJjzafg1pIS_CxnVB5-0_yk-twsZZ3ncUU,30093
|
|
21
21
|
deepdoctection/datapoint/convert.py,sha256=Be2FvmRXt-5prZ1vwa5fG6VjgEQ_31hiQ13hAoXoaes,7740
|
|
22
|
-
deepdoctection/datapoint/image.py,sha256=
|
|
23
|
-
deepdoctection/datapoint/view.py,sha256=
|
|
22
|
+
deepdoctection/datapoint/image.py,sha256=_jN46UJUsOi6GC6VEUcp3L_vLL-iYRW05RKcFLWb6Dc,34048
|
|
23
|
+
deepdoctection/datapoint/view.py,sha256=iZiHMc2hkk6vWn87LK0Qf-toZU_kocW3m7Wq8M4IS2E,50782
|
|
24
24
|
deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
|
|
25
25
|
deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
|
|
26
26
|
deepdoctection/datasets/base.py,sha256=AZx-hw8Mchzb7FiOASt7zCbiybFNsM_diBzKXyC-auU,22618
|
|
@@ -94,7 +94,7 @@ deepdoctection/mapper/d2struct.py,sha256=Dx-YnycsIQH4a5-9Gn_yMhiQ-gOFgMueNeH3rhX
|
|
|
94
94
|
deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
|
|
95
95
|
deepdoctection/mapper/laylmstruct.py,sha256=abMZkYU2W0e_VcCm_c0ZXNFuv-lfMFWcTedcZS5EYvE,42935
|
|
96
96
|
deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
|
|
97
|
-
deepdoctection/mapper/match.py,sha256=
|
|
97
|
+
deepdoctection/mapper/match.py,sha256=RDTYSGtbtT8ph3L83PyHIkezJ2K82MwNerSM72uTMxM,10267
|
|
98
98
|
deepdoctection/mapper/misc.py,sha256=vX-fV420Te00eD-cqTiWBV2twHqdBcBV2_7rAFRgPRg,7164
|
|
99
99
|
deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
|
|
100
100
|
deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
|
|
@@ -102,24 +102,24 @@ deepdoctection/mapper/pubstruct.py,sha256=PAJ2N1HSPNS6F2ZrIwlD7PiBhIM-rJscK_Ti8O
|
|
|
102
102
|
deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_rnww,4481
|
|
103
103
|
deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
|
|
104
104
|
deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
|
|
105
|
-
deepdoctection/pipe/anngen.py,sha256=
|
|
105
|
+
deepdoctection/pipe/anngen.py,sha256=7wvp7eghDwrgcIyu1vjRxmVy4SADPbn-k4ud8y2bgjU,15338
|
|
106
106
|
deepdoctection/pipe/base.py,sha256=wlza9aDOKnHKrXmaz8MLyLz0nMqqcIWQ-6Lu944aicE,15390
|
|
107
|
-
deepdoctection/pipe/common.py,sha256=
|
|
107
|
+
deepdoctection/pipe/common.py,sha256=S6-NKvR0sqBfqjN-mH76uVgM_aHOZvhPe_ore36UPZA,21028
|
|
108
108
|
deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
|
|
109
109
|
deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
|
|
110
110
|
deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
|
|
111
|
-
deepdoctection/pipe/layout.py,sha256=
|
|
111
|
+
deepdoctection/pipe/layout.py,sha256=ThULc0b1f9KyaXYk9z0qbuJ0nhIodah9PcrEq2xKpAY,5670
|
|
112
112
|
deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,17572
|
|
113
|
-
deepdoctection/pipe/order.py,sha256=
|
|
113
|
+
deepdoctection/pipe/order.py,sha256=0KNiMinedjfuDVVHxJSaDL1yl4Sub-miMPcEC4gGwPA,39423
|
|
114
114
|
deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
|
|
115
115
|
deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
|
|
116
|
-
deepdoctection/pipe/segment.py,sha256=
|
|
117
|
-
deepdoctection/pipe/sub_layout.py,sha256=
|
|
118
|
-
deepdoctection/pipe/text.py,sha256=
|
|
116
|
+
deepdoctection/pipe/segment.py,sha256=sny59GuP7dxLGX3YjHF0wllPxSiXL1GNQEhMGKcF8ZU,59594
|
|
117
|
+
deepdoctection/pipe/sub_layout.py,sha256=OLKvCYJynoFpo7bf2b3HzY0k-TJDLc0PHveWKcDbqZI,13324
|
|
118
|
+
deepdoctection/pipe/text.py,sha256=tLlJtneM__WsrAvp4pQFqwNlmq2RLqKqiPXlJ2lkniU,10483
|
|
119
119
|
deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3nI7w,3798
|
|
120
120
|
deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
|
|
121
121
|
deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
|
|
122
|
-
deepdoctection/train/hf_detr_train.py,sha256=
|
|
122
|
+
deepdoctection/train/hf_detr_train.py,sha256=uBkkRyxrJF5UF__KbYvIlmb-HRWQ9TY6LiJr1Rm56kI,12043
|
|
123
123
|
deepdoctection/train/hf_layoutlm_train.py,sha256=8kiGp_8GEyqCkLgeMgCJOLJWSVoKWkUBHsZtDjZOcRk,22556
|
|
124
124
|
deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
|
|
125
125
|
deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
|
|
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
|
|
|
141
141
|
deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
|
|
142
142
|
deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
|
|
143
143
|
deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
|
|
144
|
-
deepdoctection-0.
|
|
145
|
-
deepdoctection-0.
|
|
146
|
-
deepdoctection-0.
|
|
147
|
-
deepdoctection-0.
|
|
148
|
-
deepdoctection-0.
|
|
144
|
+
deepdoctection-0.40.0.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
145
|
+
deepdoctection-0.40.0.dist-info/METADATA,sha256=YyPBlJBcUfAQP_cW7Mhq3eNs2-924o4BMS4X6Sn0Xwo,19763
|
|
146
|
+
deepdoctection-0.40.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
147
|
+
deepdoctection-0.40.0.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
148
|
+
deepdoctection-0.40.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|