deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +38 -29
- deepdoctection/analyzer/dd.py +36 -29
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/base.py +0 -19
- deepdoctection/dataflow/custom.py +4 -3
- deepdoctection/dataflow/custom_serialize.py +14 -5
- deepdoctection/dataflow/parallel_map.py +12 -11
- deepdoctection/dataflow/serialize.py +5 -4
- deepdoctection/datapoint/annotation.py +35 -13
- deepdoctection/datapoint/box.py +3 -5
- deepdoctection/datapoint/convert.py +3 -1
- deepdoctection/datapoint/image.py +79 -36
- deepdoctection/datapoint/view.py +152 -49
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +6 -3
- deepdoctection/datasets/base.py +86 -11
- deepdoctection/datasets/dataflow_builder.py +1 -1
- deepdoctection/datasets/info.py +4 -4
- deepdoctection/datasets/instances/doclaynet.py +3 -2
- deepdoctection/datasets/instances/fintabnet.py +2 -1
- deepdoctection/datasets/instances/funsd.py +2 -1
- deepdoctection/datasets/instances/iiitar13k.py +5 -2
- deepdoctection/datasets/instances/layouttest.py +4 -8
- deepdoctection/datasets/instances/publaynet.py +2 -2
- deepdoctection/datasets/instances/pubtables1m.py +6 -3
- deepdoctection/datasets/instances/pubtabnet.py +2 -1
- deepdoctection/datasets/instances/rvlcdip.py +2 -1
- deepdoctection/datasets/instances/xfund.py +2 -1
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +1 -1
- deepdoctection/eval/base.py +5 -4
- deepdoctection/eval/cocometric.py +2 -1
- deepdoctection/eval/eval.py +19 -15
- deepdoctection/eval/tedsmetric.py +14 -11
- deepdoctection/eval/tp_eval_callback.py +14 -7
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +39 -13
- deepdoctection/extern/d2detect.py +182 -90
- deepdoctection/extern/deskew.py +36 -9
- deepdoctection/extern/doctrocr.py +265 -83
- deepdoctection/extern/fastlang.py +49 -9
- deepdoctection/extern/hfdetr.py +106 -55
- deepdoctection/extern/hflayoutlm.py +441 -122
- deepdoctection/extern/hflm.py +225 -0
- deepdoctection/extern/model.py +56 -47
- deepdoctection/extern/pdftext.py +10 -5
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +27 -18
- deepdoctection/extern/tessocr.py +134 -22
- deepdoctection/extern/texocr.py +6 -2
- deepdoctection/extern/tp/tfutils.py +43 -9
- deepdoctection/extern/tp/tpcompat.py +14 -11
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +54 -30
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/d2struct.py +9 -7
- deepdoctection/mapper/hfstruct.py +7 -2
- deepdoctection/mapper/laylmstruct.py +164 -21
- deepdoctection/mapper/maputils.py +16 -3
- deepdoctection/mapper/misc.py +6 -3
- deepdoctection/mapper/prodigystruct.py +1 -1
- deepdoctection/mapper/pubstruct.py +10 -10
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +35 -8
- deepdoctection/pipe/base.py +53 -19
- deepdoctection/pipe/common.py +23 -13
- deepdoctection/pipe/concurrency.py +2 -1
- deepdoctection/pipe/doctectionpipe.py +2 -2
- deepdoctection/pipe/language.py +3 -2
- deepdoctection/pipe/layout.py +6 -3
- deepdoctection/pipe/lm.py +34 -66
- deepdoctection/pipe/order.py +142 -35
- deepdoctection/pipe/refine.py +26 -24
- deepdoctection/pipe/segment.py +21 -16
- deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
- deepdoctection/pipe/text.py +14 -8
- deepdoctection/pipe/transform.py +16 -9
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +36 -28
- deepdoctection/train/hf_detr_train.py +26 -17
- deepdoctection/train/hf_layoutlm_train.py +133 -111
- deepdoctection/train/tp_frcnn_train.py +21 -19
- deepdoctection/utils/__init__.py +3 -0
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +2 -2
- deepdoctection/utils/env_info.py +41 -84
- deepdoctection/utils/error.py +84 -0
- deepdoctection/utils/file_utils.py +4 -15
- deepdoctection/utils/fs.py +7 -7
- deepdoctection/utils/logger.py +1 -0
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +5 -4
- deepdoctection/utils/settings.py +6 -1
- deepdoctection/utils/transform.py +1 -1
- deepdoctection/utils/utils.py +0 -6
- deepdoctection/utils/viz.py +48 -5
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
- deepdoctection-0.32.dist-info/RECORD +146 -0
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
- deepdoctection-0.30.dist-info/RECORD +0 -143
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0
deepdoctection/pipe/order.py
CHANGED
|
@@ -18,7 +18,10 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for ordering text and layout segments pipeline components
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
21
23
|
import os
|
|
24
|
+
from abc import ABC
|
|
22
25
|
from copy import copy
|
|
23
26
|
from itertools import chain
|
|
24
27
|
from logging import DEBUG
|
|
@@ -349,10 +352,11 @@ class TextLineGenerator:
|
|
|
349
352
|
self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
|
|
350
353
|
):
|
|
351
354
|
"""
|
|
352
|
-
:param make_sub_lines: Whether to build sub lines from lines
|
|
355
|
+
:param make_sub_lines: Whether to build sub lines from lines.
|
|
353
356
|
:param line_category_id: category_id to give a text line
|
|
354
|
-
:param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two
|
|
355
|
-
will be built
|
|
357
|
+
:param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
|
|
358
|
+
will be built. We use relative coordinates to calculate the distance between two
|
|
359
|
+
consecutive words. A reasonable value is 0.035
|
|
356
360
|
"""
|
|
357
361
|
if make_sub_lines and paragraph_break is None:
|
|
358
362
|
raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
|
|
@@ -375,6 +379,7 @@ class TextLineGenerator:
|
|
|
375
379
|
image_width: float,
|
|
376
380
|
image_height: float,
|
|
377
381
|
image_id: Optional[str] = None,
|
|
382
|
+
highest_level: bool = True,
|
|
378
383
|
) -> Sequence[DetectionResult]:
|
|
379
384
|
"""
|
|
380
385
|
Creating detecting result of lines (or sub lines) from given word type `ImageAnnotation`.
|
|
@@ -392,6 +397,8 @@ class TextLineGenerator:
|
|
|
392
397
|
# list of (word index, text line, word annotation_id)
|
|
393
398
|
word_order_list = OrderGenerator.group_words_into_lines(word_anns, image_id)
|
|
394
399
|
number_rows = max(word[1] for word in word_order_list)
|
|
400
|
+
if number_rows == 1 and not highest_level:
|
|
401
|
+
return []
|
|
395
402
|
detection_result_list = []
|
|
396
403
|
for number_row in range(1, number_rows + 1):
|
|
397
404
|
# list of (word index, text line, word annotation_id) for text line equal to number_row
|
|
@@ -423,29 +430,141 @@ class TextLineGenerator:
|
|
|
423
430
|
if current_box.absolute_coords:
|
|
424
431
|
current_box = current_box.transform(image_width, image_height)
|
|
425
432
|
|
|
426
|
-
# If distance between boxes is lower than paragraph break, same sub
|
|
433
|
+
# If distance between boxes is lower than paragraph break, same sub-line
|
|
427
434
|
if current_box.ulx - prev_box.lrx < self.paragraph_break: # type: ignore
|
|
428
435
|
sub_line.append(ann)
|
|
429
436
|
sub_line_ann_ids.append(ann.annotation_id)
|
|
430
437
|
else:
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
438
|
+
# We need to iterate maybe more than one time, because sub-lines may have more than one line
|
|
439
|
+
# if having been split. Take fore example a multi-column layout where a sub-line has
|
|
440
|
+
# two lines because of a column break and fonts twice as large as the other column.
|
|
441
|
+
detection_results = self.create_detection_result(
|
|
442
|
+
sub_line, image_width, image_height, image_id, False
|
|
443
|
+
)
|
|
444
|
+
if detection_results:
|
|
445
|
+
detection_result_list.extend(detection_results)
|
|
446
|
+
else:
|
|
447
|
+
boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
|
|
448
|
+
merge_box = merge_boxes(*boxes)
|
|
449
|
+
detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
|
|
450
|
+
detection_result_list.append(detection_result)
|
|
451
|
+
sub_line = [ann]
|
|
452
|
+
sub_line_ann_ids = [ann.annotation_id]
|
|
437
453
|
|
|
438
454
|
if idx == len(anns_per_row) - 1:
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
455
|
+
detection_results = self.create_detection_result(
|
|
456
|
+
sub_line, image_width, image_height, image_id, False
|
|
457
|
+
)
|
|
458
|
+
if detection_results:
|
|
459
|
+
detection_result_list.extend(detection_results)
|
|
460
|
+
else:
|
|
461
|
+
boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
|
|
462
|
+
merge_box = merge_boxes(*boxes)
|
|
463
|
+
detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
|
|
464
|
+
detection_result_list.append(detection_result)
|
|
443
465
|
|
|
444
466
|
return detection_result_list
|
|
445
467
|
|
|
446
468
|
|
|
469
|
+
class TextLineServiceMixin(PipelineComponent, ABC):
|
|
470
|
+
"""
|
|
471
|
+
This class is used to create text lines similar to TextOrderService.
|
|
472
|
+
It uses the logic of the TextOrderService but modifies it to suit its needs.
|
|
473
|
+
It specifically uses the _create_lines_for_words method and modifies the serve method.
|
|
474
|
+
"""
|
|
475
|
+
|
|
476
|
+
def __init__(
|
|
477
|
+
self,
|
|
478
|
+
name: str,
|
|
479
|
+
line_category_id: int = 1,
|
|
480
|
+
include_residual_text_container: bool = True,
|
|
481
|
+
paragraph_break: Optional[float] = None,
|
|
482
|
+
):
|
|
483
|
+
"""
|
|
484
|
+
Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
|
|
485
|
+
"""
|
|
486
|
+
self.line_category_id = line_category_id
|
|
487
|
+
self.include_residual_text_container = include_residual_text_container
|
|
488
|
+
self.text_line_generator = TextLineGenerator(
|
|
489
|
+
self.include_residual_text_container, self.line_category_id, paragraph_break
|
|
490
|
+
)
|
|
491
|
+
super().__init__(name)
|
|
492
|
+
|
|
493
|
+
def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
|
|
494
|
+
"""
|
|
495
|
+
This method creates lines for words using the TextLineGenerator instance.
|
|
496
|
+
"""
|
|
497
|
+
detection_result_list = self.text_line_generator.create_detection_result(
|
|
498
|
+
word_anns,
|
|
499
|
+
self.dp_manager.datapoint.width,
|
|
500
|
+
self.dp_manager.datapoint.height,
|
|
501
|
+
self.dp_manager.datapoint.image_id,
|
|
502
|
+
)
|
|
503
|
+
line_anns = []
|
|
504
|
+
for detect_result in detection_result_list:
|
|
505
|
+
ann_id = self.dp_manager.set_image_annotation(detect_result)
|
|
506
|
+
if ann_id:
|
|
507
|
+
line_ann = self.dp_manager.get_annotation(ann_id)
|
|
508
|
+
child_ann_id_list = detect_result.relationships["child"] # type: ignore
|
|
509
|
+
for child_ann_id in child_ann_id_list:
|
|
510
|
+
line_ann.dump_relationship(Relationships.child, child_ann_id)
|
|
511
|
+
line_anns.append(line_ann)
|
|
512
|
+
return line_anns
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
class TextLineService(TextLineServiceMixin):
|
|
516
|
+
"""
|
|
517
|
+
Some OCR systems do not identify lines of text but only provide text boxes for words. This is not sufficient
|
|
518
|
+
for certain applications. This service determines rule-based text lines based on word boxes. One difficulty is
|
|
519
|
+
that text lines are not continuous but are interrupted, for example in multi-column layouts.
|
|
520
|
+
These interruptions are taken into account insofar as the gap between two words on almost the same page height
|
|
521
|
+
must not be too large.
|
|
522
|
+
|
|
523
|
+
The service constructs new ImageAnnotation of the category `LayoutType.line` and forms relations between the
|
|
524
|
+
text lines and the words contained in the text lines. The reading order is not arranged.
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
|
|
528
|
+
"""
|
|
529
|
+
Initialize `TextLineService`
|
|
530
|
+
|
|
531
|
+
:param line_category_id: category_id to give a text line
|
|
532
|
+
:param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
|
|
533
|
+
will be built
|
|
534
|
+
"""
|
|
535
|
+
super().__init__(
|
|
536
|
+
name="text_line",
|
|
537
|
+
line_category_id=line_category_id,
|
|
538
|
+
include_residual_text_container=True,
|
|
539
|
+
paragraph_break=paragraph_break,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
def clone(self) -> PipelineComponent:
|
|
543
|
+
"""
|
|
544
|
+
This method returns a new instance of the class with the same configuration.
|
|
545
|
+
"""
|
|
546
|
+
return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
|
|
547
|
+
|
|
548
|
+
def serve(self, dp: Image) -> None:
|
|
549
|
+
text_container_anns = dp.get_annotation(category_names=LayoutType.word)
|
|
550
|
+
self._create_lines_for_words(text_container_anns)
|
|
551
|
+
|
|
552
|
+
def get_meta_annotation(self) -> JsonDict:
|
|
553
|
+
"""
|
|
554
|
+
This method returns metadata about the annotations created by this pipeline component.
|
|
555
|
+
"""
|
|
556
|
+
return dict(
|
|
557
|
+
[
|
|
558
|
+
("image_annotations", [LayoutType.line]),
|
|
559
|
+
("sub_categories", {LayoutType.line: {Relationships.child}}),
|
|
560
|
+
("relationships", {}),
|
|
561
|
+
("summaries", []),
|
|
562
|
+
]
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
|
|
447
566
|
@pipeline_component_registry.register("TextOrderService")
|
|
448
|
-
class TextOrderService(
|
|
567
|
+
class TextOrderService(TextLineServiceMixin):
|
|
449
568
|
"""
|
|
450
569
|
Reading order of words within floating text blocks as well as reading order of blocks within simple text blocks.
|
|
451
570
|
To understand the difference between floating text blocks and simple text blocks consider a page containing an
|
|
@@ -470,7 +589,8 @@ class TextOrderService(PipelineComponent):
|
|
|
470
589
|
A category annotation per word is generated, which fixes the order per word in the block, as well as a category
|
|
471
590
|
annotation per block, which saves the reading order of the block per page.
|
|
472
591
|
|
|
473
|
-
The blocks are defined in `
|
|
592
|
+
The blocks are defined in `text_block_categories` and text blocks that should be considered when generating
|
|
593
|
+
narrative text must be added in `floating_text_block_categories`.
|
|
474
594
|
|
|
475
595
|
order = TextOrderService(text_container="word",
|
|
476
596
|
text_block_categories=["title", "text", "list", "cell",
|
|
@@ -533,7 +653,12 @@ class TextOrderService(PipelineComponent):
|
|
|
533
653
|
self.text_line_generator = TextLineGenerator(
|
|
534
654
|
self.include_residual_text_container, line_category_id, paragraph_break
|
|
535
655
|
)
|
|
536
|
-
super().__init__(
|
|
656
|
+
super().__init__(
|
|
657
|
+
name="text_order",
|
|
658
|
+
line_category_id=line_category_id,
|
|
659
|
+
include_residual_text_container=include_residual_text_container,
|
|
660
|
+
paragraph_break=paragraph_break,
|
|
661
|
+
)
|
|
537
662
|
self._init_sanity_checks()
|
|
538
663
|
|
|
539
664
|
def serve(self, dp: Image) -> None:
|
|
@@ -567,24 +692,6 @@ class TextOrderService(PipelineComponent):
|
|
|
567
692
|
Relationships.reading_order, idx, Relationships.reading_order, annotation_id
|
|
568
693
|
)
|
|
569
694
|
|
|
570
|
-
def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
|
|
571
|
-
detection_result_list = self.text_line_generator.create_detection_result(
|
|
572
|
-
word_anns,
|
|
573
|
-
self.dp_manager.datapoint.width,
|
|
574
|
-
self.dp_manager.datapoint.height,
|
|
575
|
-
self.dp_manager.datapoint.image_id,
|
|
576
|
-
)
|
|
577
|
-
line_anns = []
|
|
578
|
-
for detect_result in detection_result_list:
|
|
579
|
-
ann_id = self.dp_manager.set_image_annotation(detect_result)
|
|
580
|
-
if ann_id:
|
|
581
|
-
line_ann = self.dp_manager.get_annotation(ann_id)
|
|
582
|
-
child_ann_id_list = detect_result.relationships["child"] # type: ignore
|
|
583
|
-
for child_ann_id in child_ann_id_list:
|
|
584
|
-
line_ann.dump_relationship(Relationships.child, child_ann_id)
|
|
585
|
-
line_anns.append(line_ann)
|
|
586
|
-
return line_anns
|
|
587
|
-
|
|
588
695
|
def order_text_in_text_block(self, text_block_ann: ImageAnnotation) -> None:
|
|
589
696
|
"""
|
|
590
697
|
Order text within a text block. It will take all child-like text containers (determined by a
|
deepdoctection/pipe/refine.py
CHANGED
|
@@ -23,7 +23,7 @@ from collections import defaultdict
|
|
|
23
23
|
from copy import copy
|
|
24
24
|
from dataclasses import asdict
|
|
25
25
|
from itertools import chain, product
|
|
26
|
-
from typing import DefaultDict, List, Optional, Set, Tuple, Union
|
|
26
|
+
from typing import DefaultDict, List, Optional, Sequence, Set, Tuple, Union
|
|
27
27
|
|
|
28
28
|
import networkx as nx # type: ignore
|
|
29
29
|
|
|
@@ -33,7 +33,8 @@ from ..datapoint.image import Image
|
|
|
33
33
|
from ..extern.base import DetectionResult
|
|
34
34
|
from ..mapper.maputils import MappingContextManager
|
|
35
35
|
from ..utils.detection_types import JsonDict
|
|
36
|
-
from ..utils.
|
|
36
|
+
from ..utils.error import AnnotationError, ImageError
|
|
37
|
+
from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType, get_type
|
|
37
38
|
from .base import PipelineComponent
|
|
38
39
|
from .registry import pipeline_component_registry
|
|
39
40
|
|
|
@@ -302,7 +303,7 @@ def generate_html_string(table: ImageAnnotation) -> List[str]:
|
|
|
302
303
|
:return: HTML representation of the table
|
|
303
304
|
"""
|
|
304
305
|
if table.image is None:
|
|
305
|
-
raise
|
|
306
|
+
raise ImageError("table.image cannot be None")
|
|
306
307
|
table_image = table.image
|
|
307
308
|
cells = table_image.get_annotation(
|
|
308
309
|
category_names=[
|
|
@@ -397,22 +398,16 @@ class TableSegmentationRefinementService(PipelineComponent):
|
|
|
397
398
|
|
|
398
399
|
"""
|
|
399
400
|
|
|
400
|
-
def __init__(self) -> None:
|
|
401
|
-
self.
|
|
402
|
-
self.
|
|
403
|
-
LayoutType.cell,
|
|
404
|
-
CellType.column_header,
|
|
405
|
-
CellType.projected_row_header,
|
|
406
|
-
CellType.spanning,
|
|
407
|
-
CellType.row_header,
|
|
408
|
-
]
|
|
401
|
+
def __init__(self, table_name: Sequence[ObjectTypes], cell_names: Sequence[ObjectTypes]) -> None:
|
|
402
|
+
self.table_name = table_name
|
|
403
|
+
self.cell_names = cell_names
|
|
409
404
|
super().__init__("table_segment_refine")
|
|
410
405
|
|
|
411
406
|
def serve(self, dp: Image) -> None:
|
|
412
|
-
tables = dp.get_annotation(category_names=self.
|
|
407
|
+
tables = dp.get_annotation(category_names=self.table_name)
|
|
413
408
|
for table in tables:
|
|
414
409
|
if table.image is None:
|
|
415
|
-
raise
|
|
410
|
+
raise ImageError("table.image cannot be None")
|
|
416
411
|
tiles_to_cells_list = tiles_to_cells(dp, table)
|
|
417
412
|
connected_components, tile_to_cell_dict = connected_component_tiles(tiles_to_cells_list)
|
|
418
413
|
rectangle_tiling = generate_rectangle_tiling(connected_components)
|
|
@@ -457,21 +452,28 @@ class TableSegmentationRefinementService(PipelineComponent):
|
|
|
457
452
|
for cell in cells:
|
|
458
453
|
cell.deactivate()
|
|
459
454
|
|
|
460
|
-
cells = table.image.get_annotation(category_names=self.
|
|
455
|
+
cells = table.image.get_annotation(category_names=self.cell_names)
|
|
461
456
|
number_of_rows = max(int(cell.get_sub_category(CellType.row_number).category_id) for cell in cells)
|
|
462
457
|
number_of_cols = max(int(cell.get_sub_category(CellType.column_number).category_id) for cell in cells)
|
|
463
458
|
max_row_span = max(int(cell.get_sub_category(CellType.row_span).category_id) for cell in cells)
|
|
464
459
|
max_col_span = max(int(cell.get_sub_category(CellType.column_span).category_id) for cell in cells)
|
|
465
460
|
# TODO: the summaries should be sub categories of the underlying ann
|
|
466
461
|
if table.image.summary is not None:
|
|
467
|
-
if
|
|
468
|
-
table.
|
|
469
|
-
|
|
470
|
-
table.
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
table.
|
|
462
|
+
if (
|
|
463
|
+
TableType.number_of_rows in table.image.summary.sub_categories
|
|
464
|
+
and TableType.number_of_columns in table.image.summary.sub_categories
|
|
465
|
+
and TableType.max_row_span in table.image.summary.sub_categories
|
|
466
|
+
and TableType.max_col_span in table.image.summary.sub_categories
|
|
467
|
+
):
|
|
468
|
+
table.image.summary.remove_sub_category(TableType.number_of_rows)
|
|
469
|
+
table.image.summary.remove_sub_category(TableType.number_of_columns)
|
|
470
|
+
table.image.summary.remove_sub_category(TableType.max_row_span)
|
|
471
|
+
table.image.summary.remove_sub_category(TableType.max_col_span)
|
|
472
|
+
else:
|
|
473
|
+
raise AnnotationError(
|
|
474
|
+
"Table summary does not contain sub categories TableType.number_of_rows, "
|
|
475
|
+
"TableType.number_of_columns, TableType.max_row_span, TableType.max_col_span"
|
|
476
|
+
)
|
|
475
477
|
|
|
476
478
|
self.dp_manager.set_summary_annotation(
|
|
477
479
|
TableType.number_of_rows, TableType.number_of_rows, number_of_rows, annotation_id=table.annotation_id
|
|
@@ -492,7 +494,7 @@ class TableSegmentationRefinementService(PipelineComponent):
|
|
|
492
494
|
self.dp_manager.set_container_annotation(TableType.html, -1, TableType.html, table.annotation_id, html)
|
|
493
495
|
|
|
494
496
|
def clone(self) -> PipelineComponent:
|
|
495
|
-
return self.__class__()
|
|
497
|
+
return self.__class__(self.table_name, self.cell_names)
|
|
496
498
|
|
|
497
499
|
def get_meta_annotation(self) -> JsonDict:
|
|
498
500
|
return dict(
|
deepdoctection/pipe/segment.py
CHANGED
|
@@ -33,6 +33,7 @@ from ..extern.base import DetectionResult
|
|
|
33
33
|
from ..mapper.maputils import MappingContextManager
|
|
34
34
|
from ..mapper.match import match_anns_by_intersection
|
|
35
35
|
from ..utils.detection_types import JsonDict
|
|
36
|
+
from ..utils.error import ImageError
|
|
36
37
|
from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType
|
|
37
38
|
from .base import PipelineComponent
|
|
38
39
|
from .refine import generate_html_string
|
|
@@ -136,12 +137,12 @@ def stretch_item_per_table(
|
|
|
136
137
|
|
|
137
138
|
rows = dp.get_annotation(category_names=row_name, annotation_ids=item_ann_ids)
|
|
138
139
|
if table.image is None:
|
|
139
|
-
raise
|
|
140
|
+
raise ImageError("table.image cannot be None")
|
|
140
141
|
table_embedding_box = table.get_bounding_box(dp.image_id)
|
|
141
142
|
|
|
142
143
|
for row in rows:
|
|
143
144
|
if row.image is None:
|
|
144
|
-
raise
|
|
145
|
+
raise ImageError("row.image cannot be None")
|
|
145
146
|
row_embedding_box = row.get_bounding_box(dp.image_id)
|
|
146
147
|
row_embedding_box.ulx = table_embedding_box.ulx + 1.0
|
|
147
148
|
row_embedding_box.lrx = table_embedding_box.lrx - 1.0
|
|
@@ -166,7 +167,7 @@ def stretch_item_per_table(
|
|
|
166
167
|
|
|
167
168
|
for col in cols:
|
|
168
169
|
if col.image is None:
|
|
169
|
-
raise
|
|
170
|
+
raise ImageError("row.image cannot be None")
|
|
170
171
|
col_embedding_box = col.get_bounding_box(dp.image_id)
|
|
171
172
|
col_embedding_box.uly = table_embedding_box.uly + 1.0
|
|
172
173
|
col_embedding_box.lry = table_embedding_box.lry - 1.0
|
|
@@ -194,7 +195,7 @@ def _tile_by_stretching_rows_left_and_rightwise(
|
|
|
194
195
|
dp: Image, items: List[ImageAnnotation], table: ImageAnnotation, item_name: str
|
|
195
196
|
) -> None:
|
|
196
197
|
if table.image is None:
|
|
197
|
-
raise
|
|
198
|
+
raise ImageError("table.image cannot be None")
|
|
198
199
|
table_embedding_box = table.get_bounding_box(dp.image_id)
|
|
199
200
|
|
|
200
201
|
tmp_item_xy = table_embedding_box.uly + 1.0 if item_name == LayoutType.row else table_embedding_box.ulx + 1.0
|
|
@@ -206,7 +207,7 @@ def _tile_by_stretching_rows_left_and_rightwise(
|
|
|
206
207
|
image_annotation={"category_name": item.category_name, "annotation_id": item.annotation_id},
|
|
207
208
|
):
|
|
208
209
|
if item.image is None:
|
|
209
|
-
raise
|
|
210
|
+
raise ImageError("item.image cannot be None")
|
|
210
211
|
item_embedding_box = item.get_bounding_box(dp.image_id)
|
|
211
212
|
if idx != len(items) - 1:
|
|
212
213
|
next_item_embedding_box = items[idx + 1].get_bounding_box(dp.image_id)
|
|
@@ -258,7 +259,7 @@ def _tile_by_stretching_rows_leftwise_column_downwise(
|
|
|
258
259
|
dp: Image, items: List[ImageAnnotation], table: ImageAnnotation, item_name: str
|
|
259
260
|
) -> None:
|
|
260
261
|
if table.image is None:
|
|
261
|
-
raise
|
|
262
|
+
raise ImageError("table.image cannot be None")
|
|
262
263
|
table_embedding_box = table.get_bounding_box(dp.image_id)
|
|
263
264
|
|
|
264
265
|
tmp_item_xy = table_embedding_box.uly + 1.0 if item_name == LayoutType.row else table_embedding_box.ulx + 1.0
|
|
@@ -270,7 +271,7 @@ def _tile_by_stretching_rows_leftwise_column_downwise(
|
|
|
270
271
|
image_annotation={"category_name": item.category_name, "annotation_id": item.annotation_id},
|
|
271
272
|
):
|
|
272
273
|
if item.image is None:
|
|
273
|
-
raise
|
|
274
|
+
raise ImageError("item.image cannot be None")
|
|
274
275
|
item_embedding_box = item.get_bounding_box(dp.image_id)
|
|
275
276
|
new_embedding_box = BoundingBox(
|
|
276
277
|
ulx=item_embedding_box.ulx if item_name == LayoutType.row else tmp_item_xy,
|
|
@@ -339,9 +340,9 @@ def tile_tables_with_items_per_table(
|
|
|
339
340
|
items = dp.get_annotation(category_names=item_name, annotation_ids=item_ann_ids)
|
|
340
341
|
|
|
341
342
|
items.sort(
|
|
342
|
-
key=lambda x:
|
|
343
|
-
|
|
344
|
-
|
|
343
|
+
key=lambda x: (
|
|
344
|
+
x.get_bounding_box(dp.image_id).cx if item_name == LayoutType.column else x.get_bounding_box(dp.image_id).cy
|
|
345
|
+
)
|
|
345
346
|
)
|
|
346
347
|
|
|
347
348
|
if stretch_rule == "left":
|
|
@@ -737,9 +738,11 @@ class TableSegmentationService(PipelineComponent):
|
|
|
737
738
|
|
|
738
739
|
# we will assume that either all or no image attribute has been generated
|
|
739
740
|
items.sort(
|
|
740
|
-
key=lambda x:
|
|
741
|
-
|
|
742
|
-
|
|
741
|
+
key=lambda x: (
|
|
742
|
+
x.get_bounding_box(dp.image_id).cx # pylint: disable=W0640
|
|
743
|
+
if item_name == LayoutType.column # pylint: disable=W0640
|
|
744
|
+
else x.get_bounding_box(dp.image_id).cy # pylint: disable=W0640
|
|
745
|
+
)
|
|
743
746
|
)
|
|
744
747
|
|
|
745
748
|
for item_number, item in enumerate(items, 1):
|
|
@@ -939,9 +942,11 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
939
942
|
|
|
940
943
|
# we will assume that either all or no image attribute has been generated
|
|
941
944
|
items.sort(
|
|
942
|
-
key=lambda x:
|
|
943
|
-
|
|
944
|
-
|
|
945
|
+
key=lambda x: (
|
|
946
|
+
x.get_bounding_box(dp.image_id).cx
|
|
947
|
+
if item_name == LayoutType.column # pylint: disable=W0640
|
|
948
|
+
else x.get_bounding_box(dp.image_id).cy
|
|
949
|
+
)
|
|
945
950
|
)
|
|
946
951
|
|
|
947
952
|
for item_number, item in enumerate(items, 1):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
# File:
|
|
2
|
+
# File: sub_layout.py
|
|
3
3
|
|
|
4
4
|
# Copyright 2021 Dr. Janis Meyer. All rights reserved.
|
|
5
5
|
#
|
|
@@ -24,9 +24,11 @@ from typing import Dict, List, Mapping, Optional, Sequence, Union
|
|
|
24
24
|
|
|
25
25
|
import numpy as np
|
|
26
26
|
|
|
27
|
+
from ..datapoint.annotation import ImageAnnotation
|
|
28
|
+
from ..datapoint.box import crop_box_from_image
|
|
27
29
|
from ..datapoint.image import Image
|
|
28
30
|
from ..extern.base import DetectionResult, ObjectDetector, PdfMiner
|
|
29
|
-
from ..utils.detection_types import JsonDict
|
|
31
|
+
from ..utils.detection_types import ImageType, JsonDict
|
|
30
32
|
from ..utils.settings import ObjectTypes, Relationships
|
|
31
33
|
from ..utils.transform import PadTransform
|
|
32
34
|
from .base import PredictorPipelineComponent
|
|
@@ -181,18 +183,14 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
181
183
|
"""
|
|
182
184
|
sub_image_anns = dp.get_annotation_iter(category_names=self.sub_image_name)
|
|
183
185
|
for sub_image_ann in sub_image_anns:
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
np_image = sub_image_ann.image.image
|
|
187
|
-
if self.padder:
|
|
188
|
-
np_image = self.padder.apply_image(np_image)
|
|
189
|
-
detect_result_list = self.predictor.predict(np_image)
|
|
186
|
+
np_image = self.prepare_np_image(sub_image_ann)
|
|
187
|
+
detect_result_list = self.predictor.predict(np_image) # type: ignore
|
|
190
188
|
if self.padder and detect_result_list:
|
|
191
189
|
boxes = np.array([detect_result.box for detect_result in detect_result_list])
|
|
192
190
|
boxes_orig = self.padder.inverse_apply_coords(boxes)
|
|
193
191
|
for idx, detect_result in enumerate(detect_result_list):
|
|
194
192
|
detect_result.box = boxes_orig[idx, :].tolist()
|
|
195
|
-
if self.detect_result_generator:
|
|
193
|
+
if self.detect_result_generator and sub_image_ann.image:
|
|
196
194
|
self.detect_result_generator.width = sub_image_ann.image.width
|
|
197
195
|
self.detect_result_generator.height = sub_image_ann.image.height
|
|
198
196
|
detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
|
|
@@ -235,3 +233,26 @@ class SubImageLayoutService(PredictorPipelineComponent):
|
|
|
235
233
|
deepcopy(self.detect_result_generator),
|
|
236
234
|
padder_clone,
|
|
237
235
|
)
|
|
236
|
+
|
|
237
|
+
def prepare_np_image(self, sub_image_ann: ImageAnnotation) -> ImageType:
|
|
238
|
+
"""Maybe crop and pad a np_array before passing it to the predictor.
|
|
239
|
+
|
|
240
|
+
Note that we currently assume to a two level hierachy of images, e.g. we can crop a sub-image from the base
|
|
241
|
+
image, e.g. the original input but we cannot crop a sub-image from an image which is itself a sub-image.
|
|
242
|
+
|
|
243
|
+
:param sub_image_ann: ImageAnnotation to be processed
|
|
244
|
+
:return: processed np_image
|
|
245
|
+
"""
|
|
246
|
+
if sub_image_ann.image is None:
|
|
247
|
+
raise ValueError("sub_image_ann.image is None, but must be an datapoint.Image")
|
|
248
|
+
np_image = sub_image_ann.image.image
|
|
249
|
+
if np_image is None and self.dp_manager.datapoint.image is not None:
|
|
250
|
+
np_image = crop_box_from_image(
|
|
251
|
+
self.dp_manager.datapoint.image,
|
|
252
|
+
sub_image_ann.get_bounding_box(self.dp_manager.datapoint.image_id),
|
|
253
|
+
self.dp_manager.datapoint.width,
|
|
254
|
+
self.dp_manager.datapoint.height,
|
|
255
|
+
)
|
|
256
|
+
if self.padder:
|
|
257
|
+
np_image = self.padder.apply_image(np_image)
|
|
258
|
+
return np_image
|
deepdoctection/pipe/text.py
CHANGED
|
@@ -26,6 +26,7 @@ from ..datapoint.image import Image
|
|
|
26
26
|
from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
|
|
27
27
|
from ..extern.tessocr import TesseractOcrDetector
|
|
28
28
|
from ..utils.detection_types import ImageType, JsonDict
|
|
29
|
+
from ..utils.error import ImageError
|
|
29
30
|
from ..utils.settings import PageType, TypeOrStr, WordType, get_type
|
|
30
31
|
from .base import PredictorPipelineComponent
|
|
31
32
|
from .registry import pipeline_component_registry
|
|
@@ -89,7 +90,10 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
89
90
|
super().__init__(self._get_name(text_extract_detector.name), text_extract_detector)
|
|
90
91
|
if self.extract_from_category:
|
|
91
92
|
if not isinstance(self.predictor, (ObjectDetector, TextRecognizer)):
|
|
92
|
-
raise TypeError(
|
|
93
|
+
raise TypeError(
|
|
94
|
+
f"Predicting from a cropped image requires to pass an ObjectDetector or "
|
|
95
|
+
f"TextRecognizer. Got {type(self.predictor)}"
|
|
96
|
+
)
|
|
93
97
|
if run_time_ocr_language_selection:
|
|
94
98
|
assert isinstance(
|
|
95
99
|
self.predictor, TesseractOcrDetector
|
|
@@ -171,13 +175,13 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
171
175
|
|
|
172
176
|
if isinstance(text_roi, ImageAnnotation):
|
|
173
177
|
if text_roi.image is None:
|
|
174
|
-
raise
|
|
178
|
+
raise ImageError("text_roi.image cannot be None")
|
|
175
179
|
if text_roi.image.image is None:
|
|
176
|
-
raise
|
|
180
|
+
raise ImageError("text_roi.image.image cannot be None")
|
|
177
181
|
return text_roi.image.image
|
|
178
182
|
if isinstance(self.predictor, ObjectDetector):
|
|
179
183
|
if not isinstance(text_roi, Image):
|
|
180
|
-
raise
|
|
184
|
+
raise ImageError("text_roi must be an image")
|
|
181
185
|
return text_roi.image
|
|
182
186
|
if isinstance(text_roi, list):
|
|
183
187
|
assert all(roi.image is not None for roi in text_roi)
|
|
@@ -201,9 +205,11 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
201
205
|
[
|
|
202
206
|
(
|
|
203
207
|
"image_annotations",
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
208
|
+
(
|
|
209
|
+
self.predictor.possible_categories()
|
|
210
|
+
if isinstance(self.predictor, (ObjectDetector, PdfMiner))
|
|
211
|
+
else []
|
|
212
|
+
),
|
|
207
213
|
),
|
|
208
214
|
("sub_categories", sub_cat_dict),
|
|
209
215
|
("relationships", {}),
|
|
@@ -218,5 +224,5 @@ class TextExtractionService(PredictorPipelineComponent):
|
|
|
218
224
|
def clone(self) -> "PredictorPipelineComponent":
|
|
219
225
|
predictor = self.predictor.clone()
|
|
220
226
|
if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
|
|
221
|
-
raise
|
|
227
|
+
raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
|
|
222
228
|
return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)
|
deepdoctection/pipe/transform.py
CHANGED
|
@@ -23,7 +23,6 @@ on images (e.g. deskew, de-noising or more general GAN like operations.
|
|
|
23
23
|
from ..datapoint.image import Image
|
|
24
24
|
from ..extern.base import ImageTransformer
|
|
25
25
|
from ..utils.detection_types import JsonDict
|
|
26
|
-
from ..utils.logger import LoggingRecord, logger
|
|
27
26
|
from .base import ImageTransformPipelineComponent
|
|
28
27
|
from .registry import pipeline_component_registry
|
|
29
28
|
|
|
@@ -49,16 +48,24 @@ class SimpleTransformService(ImageTransformPipelineComponent):
|
|
|
49
48
|
|
|
50
49
|
def serve(self, dp: Image) -> None:
|
|
51
50
|
if dp.annotations:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
)
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
"SimpleTransformService receives datapoints with ÌmageAnnotations. This violates the "
|
|
53
|
+
"pipeline building API but this can currently be catched only at runtime. "
|
|
54
|
+
"Please make sure that this component is the first one in the pipeline."
|
|
57
55
|
)
|
|
56
|
+
|
|
58
57
|
if dp.image is not None:
|
|
59
|
-
|
|
58
|
+
detection_result = self.transform_predictor.predict(dp.image)
|
|
59
|
+
transformed_image = self.transform_predictor.transform(dp.image, detection_result)
|
|
60
60
|
self.dp_manager.datapoint.clear_image(True)
|
|
61
|
-
self.dp_manager.datapoint.image =
|
|
61
|
+
self.dp_manager.datapoint.image = transformed_image
|
|
62
|
+
self.dp_manager.set_summary_annotation(
|
|
63
|
+
summary_key=self.transform_predictor.possible_category(),
|
|
64
|
+
summary_name=self.transform_predictor.possible_category(),
|
|
65
|
+
summary_number=None,
|
|
66
|
+
summary_value=getattr(detection_result, self.transform_predictor.possible_category().value, None),
|
|
67
|
+
summary_score=detection_result.score,
|
|
68
|
+
)
|
|
62
69
|
|
|
63
70
|
def clone(self) -> "SimpleTransformService":
|
|
64
71
|
return self.__class__(self.transform_predictor)
|
|
@@ -69,7 +76,7 @@ class SimpleTransformService(ImageTransformPipelineComponent):
|
|
|
69
76
|
("image_annotations", []),
|
|
70
77
|
("sub_categories", {}),
|
|
71
78
|
("relationships", {}),
|
|
72
|
-
("summaries", []),
|
|
79
|
+
("summaries", [self.transform_predictor.possible_category()]),
|
|
73
80
|
]
|
|
74
81
|
)
|
|
75
82
|
|
deepdoctection/train/__init__.py
CHANGED
|
@@ -19,20 +19,14 @@
|
|
|
19
19
|
Init module for train package
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from ..utils.file_utils import
|
|
23
|
-
detectron2_available,
|
|
24
|
-
pytorch_available,
|
|
25
|
-
tensorpack_available,
|
|
26
|
-
tf_available,
|
|
27
|
-
transformers_available,
|
|
28
|
-
)
|
|
22
|
+
from ..utils.file_utils import detectron2_available, tensorpack_available, transformers_available
|
|
29
23
|
|
|
30
|
-
if
|
|
31
|
-
from .tp_frcnn_train import train_faster_rcnn
|
|
32
|
-
|
|
33
|
-
if pytorch_available() and detectron2_available():
|
|
24
|
+
if detectron2_available():
|
|
34
25
|
from .d2_frcnn_train import train_d2_faster_rcnn
|
|
35
26
|
|
|
36
|
-
if
|
|
27
|
+
if transformers_available():
|
|
37
28
|
from .hf_detr_train import train_hf_detr
|
|
38
29
|
from .hf_layoutlm_train import train_hf_layoutlm
|
|
30
|
+
|
|
31
|
+
if tensorpack_available():
|
|
32
|
+
from .tp_frcnn_train import train_faster_rcnn
|