deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (120) hide show
  1. deepdoctection/__init__.py +38 -29
  2. deepdoctection/analyzer/dd.py +36 -29
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/dataflow/base.py +0 -19
  5. deepdoctection/dataflow/custom.py +4 -3
  6. deepdoctection/dataflow/custom_serialize.py +14 -5
  7. deepdoctection/dataflow/parallel_map.py +12 -11
  8. deepdoctection/dataflow/serialize.py +5 -4
  9. deepdoctection/datapoint/annotation.py +35 -13
  10. deepdoctection/datapoint/box.py +3 -5
  11. deepdoctection/datapoint/convert.py +3 -1
  12. deepdoctection/datapoint/image.py +79 -36
  13. deepdoctection/datapoint/view.py +152 -49
  14. deepdoctection/datasets/__init__.py +1 -4
  15. deepdoctection/datasets/adapter.py +6 -3
  16. deepdoctection/datasets/base.py +86 -11
  17. deepdoctection/datasets/dataflow_builder.py +1 -1
  18. deepdoctection/datasets/info.py +4 -4
  19. deepdoctection/datasets/instances/doclaynet.py +3 -2
  20. deepdoctection/datasets/instances/fintabnet.py +2 -1
  21. deepdoctection/datasets/instances/funsd.py +2 -1
  22. deepdoctection/datasets/instances/iiitar13k.py +5 -2
  23. deepdoctection/datasets/instances/layouttest.py +4 -8
  24. deepdoctection/datasets/instances/publaynet.py +2 -2
  25. deepdoctection/datasets/instances/pubtables1m.py +6 -3
  26. deepdoctection/datasets/instances/pubtabnet.py +2 -1
  27. deepdoctection/datasets/instances/rvlcdip.py +2 -1
  28. deepdoctection/datasets/instances/xfund.py +2 -1
  29. deepdoctection/eval/__init__.py +1 -4
  30. deepdoctection/eval/accmetric.py +1 -1
  31. deepdoctection/eval/base.py +5 -4
  32. deepdoctection/eval/cocometric.py +2 -1
  33. deepdoctection/eval/eval.py +19 -15
  34. deepdoctection/eval/tedsmetric.py +14 -11
  35. deepdoctection/eval/tp_eval_callback.py +14 -7
  36. deepdoctection/extern/__init__.py +2 -7
  37. deepdoctection/extern/base.py +39 -13
  38. deepdoctection/extern/d2detect.py +182 -90
  39. deepdoctection/extern/deskew.py +36 -9
  40. deepdoctection/extern/doctrocr.py +265 -83
  41. deepdoctection/extern/fastlang.py +49 -9
  42. deepdoctection/extern/hfdetr.py +106 -55
  43. deepdoctection/extern/hflayoutlm.py +441 -122
  44. deepdoctection/extern/hflm.py +225 -0
  45. deepdoctection/extern/model.py +56 -47
  46. deepdoctection/extern/pdftext.py +10 -5
  47. deepdoctection/extern/pt/__init__.py +1 -3
  48. deepdoctection/extern/pt/nms.py +6 -2
  49. deepdoctection/extern/pt/ptutils.py +27 -18
  50. deepdoctection/extern/tessocr.py +134 -22
  51. deepdoctection/extern/texocr.py +6 -2
  52. deepdoctection/extern/tp/tfutils.py +43 -9
  53. deepdoctection/extern/tp/tpcompat.py +14 -11
  54. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  55. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  56. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  57. deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
  58. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
  60. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  61. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
  62. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  63. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
  64. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
  65. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
  66. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  67. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  68. deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
  69. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  70. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  71. deepdoctection/extern/tpdetect.py +54 -30
  72. deepdoctection/mapper/__init__.py +3 -8
  73. deepdoctection/mapper/d2struct.py +9 -7
  74. deepdoctection/mapper/hfstruct.py +7 -2
  75. deepdoctection/mapper/laylmstruct.py +164 -21
  76. deepdoctection/mapper/maputils.py +16 -3
  77. deepdoctection/mapper/misc.py +6 -3
  78. deepdoctection/mapper/prodigystruct.py +1 -1
  79. deepdoctection/mapper/pubstruct.py +10 -10
  80. deepdoctection/mapper/tpstruct.py +3 -3
  81. deepdoctection/pipe/__init__.py +1 -1
  82. deepdoctection/pipe/anngen.py +35 -8
  83. deepdoctection/pipe/base.py +53 -19
  84. deepdoctection/pipe/common.py +23 -13
  85. deepdoctection/pipe/concurrency.py +2 -1
  86. deepdoctection/pipe/doctectionpipe.py +2 -2
  87. deepdoctection/pipe/language.py +3 -2
  88. deepdoctection/pipe/layout.py +6 -3
  89. deepdoctection/pipe/lm.py +34 -66
  90. deepdoctection/pipe/order.py +142 -35
  91. deepdoctection/pipe/refine.py +26 -24
  92. deepdoctection/pipe/segment.py +21 -16
  93. deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
  94. deepdoctection/pipe/text.py +14 -8
  95. deepdoctection/pipe/transform.py +16 -9
  96. deepdoctection/train/__init__.py +6 -12
  97. deepdoctection/train/d2_frcnn_train.py +36 -28
  98. deepdoctection/train/hf_detr_train.py +26 -17
  99. deepdoctection/train/hf_layoutlm_train.py +133 -111
  100. deepdoctection/train/tp_frcnn_train.py +21 -19
  101. deepdoctection/utils/__init__.py +3 -0
  102. deepdoctection/utils/concurrency.py +1 -1
  103. deepdoctection/utils/context.py +2 -2
  104. deepdoctection/utils/env_info.py +41 -84
  105. deepdoctection/utils/error.py +84 -0
  106. deepdoctection/utils/file_utils.py +4 -15
  107. deepdoctection/utils/fs.py +7 -7
  108. deepdoctection/utils/logger.py +1 -0
  109. deepdoctection/utils/mocks.py +93 -0
  110. deepdoctection/utils/pdf_utils.py +5 -4
  111. deepdoctection/utils/settings.py +6 -1
  112. deepdoctection/utils/transform.py +1 -1
  113. deepdoctection/utils/utils.py +0 -6
  114. deepdoctection/utils/viz.py +48 -5
  115. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
  116. deepdoctection-0.32.dist-info/RECORD +146 -0
  117. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
  118. deepdoctection-0.30.dist-info/RECORD +0 -143
  119. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
  120. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,10 @@
18
18
  """
19
19
  Module for ordering text and layout segments pipeline components
20
20
  """
21
+ from __future__ import annotations
22
+
21
23
  import os
24
+ from abc import ABC
22
25
  from copy import copy
23
26
  from itertools import chain
24
27
  from logging import DEBUG
@@ -349,10 +352,11 @@ class TextLineGenerator:
349
352
  self, make_sub_lines: bool, line_category_id: Union[int, str], paragraph_break: Optional[float] = None
350
353
  ):
351
354
  """
352
- :param make_sub_lines: Whether to build sub lines from lines
355
+ :param make_sub_lines: Whether to build sub lines from lines.
353
356
  :param line_category_id: category_id to give a text line
354
- :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
355
- will be built
357
+ :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sub-lines
358
+ will be built. We use relative coordinates to calculate the distance between two
359
+ consecutive words. A reasonable value is 0.035
356
360
  """
357
361
  if make_sub_lines and paragraph_break is None:
358
362
  raise ValueError("You must specify paragraph_break when setting make_sub_lines to True")
@@ -375,6 +379,7 @@ class TextLineGenerator:
375
379
  image_width: float,
376
380
  image_height: float,
377
381
  image_id: Optional[str] = None,
382
+ highest_level: bool = True,
378
383
  ) -> Sequence[DetectionResult]:
379
384
  """
380
385
  Creating detecting result of lines (or sub lines) from given word type `ImageAnnotation`.
@@ -392,6 +397,8 @@ class TextLineGenerator:
392
397
  # list of (word index, text line, word annotation_id)
393
398
  word_order_list = OrderGenerator.group_words_into_lines(word_anns, image_id)
394
399
  number_rows = max(word[1] for word in word_order_list)
400
+ if number_rows == 1 and not highest_level:
401
+ return []
395
402
  detection_result_list = []
396
403
  for number_row in range(1, number_rows + 1):
397
404
  # list of (word index, text line, word annotation_id) for text line equal to number_row
@@ -423,29 +430,141 @@ class TextLineGenerator:
423
430
  if current_box.absolute_coords:
424
431
  current_box = current_box.transform(image_width, image_height)
425
432
 
426
- # If distance between boxes is lower than paragraph break, same sub line
433
+ # If distance between boxes is lower than paragraph break, same sub-line
427
434
  if current_box.ulx - prev_box.lrx < self.paragraph_break: # type: ignore
428
435
  sub_line.append(ann)
429
436
  sub_line_ann_ids.append(ann.annotation_id)
430
437
  else:
431
- boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
432
- merge_box = merge_boxes(*boxes)
433
- detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
434
- detection_result_list.append(detection_result)
435
- sub_line = [ann]
436
- sub_line_ann_ids = [ann.annotation_id]
438
+ # We need to iterate maybe more than one time, because sub-lines may have more than one line
439
+ # if having been split. Take fore example a multi-column layout where a sub-line has
440
+ # two lines because of a column break and fonts twice as large as the other column.
441
+ detection_results = self.create_detection_result(
442
+ sub_line, image_width, image_height, image_id, False
443
+ )
444
+ if detection_results:
445
+ detection_result_list.extend(detection_results)
446
+ else:
447
+ boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
448
+ merge_box = merge_boxes(*boxes)
449
+ detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
450
+ detection_result_list.append(detection_result)
451
+ sub_line = [ann]
452
+ sub_line_ann_ids = [ann.annotation_id]
437
453
 
438
454
  if idx == len(anns_per_row) - 1:
439
- boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
440
- merge_box = merge_boxes(*boxes)
441
- detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
442
- detection_result_list.append(detection_result)
455
+ detection_results = self.create_detection_result(
456
+ sub_line, image_width, image_height, image_id, False
457
+ )
458
+ if detection_results:
459
+ detection_result_list.extend(detection_results)
460
+ else:
461
+ boxes = [ann.get_bounding_box(image_id) for ann in sub_line]
462
+ merge_box = merge_boxes(*boxes)
463
+ detection_result = self._make_detect_result(merge_box, {"child": sub_line_ann_ids})
464
+ detection_result_list.append(detection_result)
443
465
 
444
466
  return detection_result_list
445
467
 
446
468
 
469
+ class TextLineServiceMixin(PipelineComponent, ABC):
470
+ """
471
+ This class is used to create text lines similar to TextOrderService.
472
+ It uses the logic of the TextOrderService but modifies it to suit its needs.
473
+ It specifically uses the _create_lines_for_words method and modifies the serve method.
474
+ """
475
+
476
+ def __init__(
477
+ self,
478
+ name: str,
479
+ line_category_id: int = 1,
480
+ include_residual_text_container: bool = True,
481
+ paragraph_break: Optional[float] = None,
482
+ ):
483
+ """
484
+ Initialize the TextLineService with a line_category_id and a TextLineGenerator instance.
485
+ """
486
+ self.line_category_id = line_category_id
487
+ self.include_residual_text_container = include_residual_text_container
488
+ self.text_line_generator = TextLineGenerator(
489
+ self.include_residual_text_container, self.line_category_id, paragraph_break
490
+ )
491
+ super().__init__(name)
492
+
493
+ def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
494
+ """
495
+ This method creates lines for words using the TextLineGenerator instance.
496
+ """
497
+ detection_result_list = self.text_line_generator.create_detection_result(
498
+ word_anns,
499
+ self.dp_manager.datapoint.width,
500
+ self.dp_manager.datapoint.height,
501
+ self.dp_manager.datapoint.image_id,
502
+ )
503
+ line_anns = []
504
+ for detect_result in detection_result_list:
505
+ ann_id = self.dp_manager.set_image_annotation(detect_result)
506
+ if ann_id:
507
+ line_ann = self.dp_manager.get_annotation(ann_id)
508
+ child_ann_id_list = detect_result.relationships["child"] # type: ignore
509
+ for child_ann_id in child_ann_id_list:
510
+ line_ann.dump_relationship(Relationships.child, child_ann_id)
511
+ line_anns.append(line_ann)
512
+ return line_anns
513
+
514
+
515
+ class TextLineService(TextLineServiceMixin):
516
+ """
517
+ Some OCR systems do not identify lines of text but only provide text boxes for words. This is not sufficient
518
+ for certain applications. This service determines rule-based text lines based on word boxes. One difficulty is
519
+ that text lines are not continuous but are interrupted, for example in multi-column layouts.
520
+ These interruptions are taken into account insofar as the gap between two words on almost the same page height
521
+ must not be too large.
522
+
523
+ The service constructs new ImageAnnotation of the category `LayoutType.line` and forms relations between the
524
+ text lines and the words contained in the text lines. The reading order is not arranged.
525
+ """
526
+
527
+ def __init__(self, line_category_id: int = 1, paragraph_break: Optional[float] = None):
528
+ """
529
+ Initialize `TextLineService`
530
+
531
+ :param line_category_id: category_id to give a text line
532
+ :param paragraph_break: threshold of two consecutive words. If distance is larger than threshold, two sublines
533
+ will be built
534
+ """
535
+ super().__init__(
536
+ name="text_line",
537
+ line_category_id=line_category_id,
538
+ include_residual_text_container=True,
539
+ paragraph_break=paragraph_break,
540
+ )
541
+
542
+ def clone(self) -> PipelineComponent:
543
+ """
544
+ This method returns a new instance of the class with the same configuration.
545
+ """
546
+ return self.__class__(self.line_category_id, self.text_line_generator.paragraph_break)
547
+
548
+ def serve(self, dp: Image) -> None:
549
+ text_container_anns = dp.get_annotation(category_names=LayoutType.word)
550
+ self._create_lines_for_words(text_container_anns)
551
+
552
+ def get_meta_annotation(self) -> JsonDict:
553
+ """
554
+ This method returns metadata about the annotations created by this pipeline component.
555
+ """
556
+ return dict(
557
+ [
558
+ ("image_annotations", [LayoutType.line]),
559
+ ("sub_categories", {LayoutType.line: {Relationships.child}}),
560
+ ("relationships", {}),
561
+ ("summaries", []),
562
+ ]
563
+ )
564
+
565
+
447
566
  @pipeline_component_registry.register("TextOrderService")
448
- class TextOrderService(PipelineComponent):
567
+ class TextOrderService(TextLineServiceMixin):
449
568
  """
450
569
  Reading order of words within floating text blocks as well as reading order of blocks within simple text blocks.
451
570
  To understand the difference between floating text blocks and simple text blocks consider a page containing an
@@ -470,7 +589,8 @@ class TextOrderService(PipelineComponent):
470
589
  A category annotation per word is generated, which fixes the order per word in the block, as well as a category
471
590
  annotation per block, which saves the reading order of the block per page.
472
591
 
473
- The blocks are defined in `_floating_text_block_names` and text blocks in `_floating_text_block_names`.
592
+ The blocks are defined in `text_block_categories` and text blocks that should be considered when generating
593
+ narrative text must be added in `floating_text_block_categories`.
474
594
 
475
595
  order = TextOrderService(text_container="word",
476
596
  text_block_categories=["title", "text", "list", "cell",
@@ -533,7 +653,12 @@ class TextOrderService(PipelineComponent):
533
653
  self.text_line_generator = TextLineGenerator(
534
654
  self.include_residual_text_container, line_category_id, paragraph_break
535
655
  )
536
- super().__init__("text_order")
656
+ super().__init__(
657
+ name="text_order",
658
+ line_category_id=line_category_id,
659
+ include_residual_text_container=include_residual_text_container,
660
+ paragraph_break=paragraph_break,
661
+ )
537
662
  self._init_sanity_checks()
538
663
 
539
664
  def serve(self, dp: Image) -> None:
@@ -567,24 +692,6 @@ class TextOrderService(PipelineComponent):
567
692
  Relationships.reading_order, idx, Relationships.reading_order, annotation_id
568
693
  )
569
694
 
570
- def _create_lines_for_words(self, word_anns: Sequence[ImageAnnotation]) -> Sequence[ImageAnnotation]:
571
- detection_result_list = self.text_line_generator.create_detection_result(
572
- word_anns,
573
- self.dp_manager.datapoint.width,
574
- self.dp_manager.datapoint.height,
575
- self.dp_manager.datapoint.image_id,
576
- )
577
- line_anns = []
578
- for detect_result in detection_result_list:
579
- ann_id = self.dp_manager.set_image_annotation(detect_result)
580
- if ann_id:
581
- line_ann = self.dp_manager.get_annotation(ann_id)
582
- child_ann_id_list = detect_result.relationships["child"] # type: ignore
583
- for child_ann_id in child_ann_id_list:
584
- line_ann.dump_relationship(Relationships.child, child_ann_id)
585
- line_anns.append(line_ann)
586
- return line_anns
587
-
588
695
  def order_text_in_text_block(self, text_block_ann: ImageAnnotation) -> None:
589
696
  """
590
697
  Order text within a text block. It will take all child-like text containers (determined by a
@@ -23,7 +23,7 @@ from collections import defaultdict
23
23
  from copy import copy
24
24
  from dataclasses import asdict
25
25
  from itertools import chain, product
26
- from typing import DefaultDict, List, Optional, Set, Tuple, Union
26
+ from typing import DefaultDict, List, Optional, Sequence, Set, Tuple, Union
27
27
 
28
28
  import networkx as nx # type: ignore
29
29
 
@@ -33,7 +33,8 @@ from ..datapoint.image import Image
33
33
  from ..extern.base import DetectionResult
34
34
  from ..mapper.maputils import MappingContextManager
35
35
  from ..utils.detection_types import JsonDict
36
- from ..utils.settings import CellType, LayoutType, Relationships, TableType, get_type
36
+ from ..utils.error import AnnotationError, ImageError
37
+ from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType, get_type
37
38
  from .base import PipelineComponent
38
39
  from .registry import pipeline_component_registry
39
40
 
@@ -302,7 +303,7 @@ def generate_html_string(table: ImageAnnotation) -> List[str]:
302
303
  :return: HTML representation of the table
303
304
  """
304
305
  if table.image is None:
305
- raise ValueError("table.image cannot be None")
306
+ raise ImageError("table.image cannot be None")
306
307
  table_image = table.image
307
308
  cells = table_image.get_annotation(
308
309
  category_names=[
@@ -397,22 +398,16 @@ class TableSegmentationRefinementService(PipelineComponent):
397
398
 
398
399
  """
399
400
 
400
- def __init__(self) -> None:
401
- self._table_name = [LayoutType.table, LayoutType.table_rotated]
402
- self._cell_names = [
403
- LayoutType.cell,
404
- CellType.column_header,
405
- CellType.projected_row_header,
406
- CellType.spanning,
407
- CellType.row_header,
408
- ]
401
+ def __init__(self, table_name: Sequence[ObjectTypes], cell_names: Sequence[ObjectTypes]) -> None:
402
+ self.table_name = table_name
403
+ self.cell_names = cell_names
409
404
  super().__init__("table_segment_refine")
410
405
 
411
406
  def serve(self, dp: Image) -> None:
412
- tables = dp.get_annotation(category_names=self._table_name)
407
+ tables = dp.get_annotation(category_names=self.table_name)
413
408
  for table in tables:
414
409
  if table.image is None:
415
- raise ValueError("table.image cannot be None")
410
+ raise ImageError("table.image cannot be None")
416
411
  tiles_to_cells_list = tiles_to_cells(dp, table)
417
412
  connected_components, tile_to_cell_dict = connected_component_tiles(tiles_to_cells_list)
418
413
  rectangle_tiling = generate_rectangle_tiling(connected_components)
@@ -457,21 +452,28 @@ class TableSegmentationRefinementService(PipelineComponent):
457
452
  for cell in cells:
458
453
  cell.deactivate()
459
454
 
460
- cells = table.image.get_annotation(category_names=self._cell_names)
455
+ cells = table.image.get_annotation(category_names=self.cell_names)
461
456
  number_of_rows = max(int(cell.get_sub_category(CellType.row_number).category_id) for cell in cells)
462
457
  number_of_cols = max(int(cell.get_sub_category(CellType.column_number).category_id) for cell in cells)
463
458
  max_row_span = max(int(cell.get_sub_category(CellType.row_span).category_id) for cell in cells)
464
459
  max_col_span = max(int(cell.get_sub_category(CellType.column_span).category_id) for cell in cells)
465
460
  # TODO: the summaries should be sub categories of the underlying ann
466
461
  if table.image.summary is not None:
467
- if TableType.number_of_rows in table.image.summary.sub_categories:
468
- table.get_summary(TableType.number_of_rows)
469
- if TableType.number_of_columns in table.image.summary.sub_categories:
470
- table.get_summary(TableType.number_of_columns)
471
- if TableType.max_row_span in table.image.summary.sub_categories:
472
- table.get_summary(TableType.max_row_span)
473
- if TableType.max_col_span in table.image.summary.sub_categories:
474
- table.get_summary(TableType.max_col_span)
462
+ if (
463
+ TableType.number_of_rows in table.image.summary.sub_categories
464
+ and TableType.number_of_columns in table.image.summary.sub_categories
465
+ and TableType.max_row_span in table.image.summary.sub_categories
466
+ and TableType.max_col_span in table.image.summary.sub_categories
467
+ ):
468
+ table.image.summary.remove_sub_category(TableType.number_of_rows)
469
+ table.image.summary.remove_sub_category(TableType.number_of_columns)
470
+ table.image.summary.remove_sub_category(TableType.max_row_span)
471
+ table.image.summary.remove_sub_category(TableType.max_col_span)
472
+ else:
473
+ raise AnnotationError(
474
+ "Table summary does not contain sub categories TableType.number_of_rows, "
475
+ "TableType.number_of_columns, TableType.max_row_span, TableType.max_col_span"
476
+ )
475
477
 
476
478
  self.dp_manager.set_summary_annotation(
477
479
  TableType.number_of_rows, TableType.number_of_rows, number_of_rows, annotation_id=table.annotation_id
@@ -492,7 +494,7 @@ class TableSegmentationRefinementService(PipelineComponent):
492
494
  self.dp_manager.set_container_annotation(TableType.html, -1, TableType.html, table.annotation_id, html)
493
495
 
494
496
  def clone(self) -> PipelineComponent:
495
- return self.__class__()
497
+ return self.__class__(self.table_name, self.cell_names)
496
498
 
497
499
  def get_meta_annotation(self) -> JsonDict:
498
500
  return dict(
@@ -33,6 +33,7 @@ from ..extern.base import DetectionResult
33
33
  from ..mapper.maputils import MappingContextManager
34
34
  from ..mapper.match import match_anns_by_intersection
35
35
  from ..utils.detection_types import JsonDict
36
+ from ..utils.error import ImageError
36
37
  from ..utils.settings import CellType, LayoutType, ObjectTypes, Relationships, TableType
37
38
  from .base import PipelineComponent
38
39
  from .refine import generate_html_string
@@ -136,12 +137,12 @@ def stretch_item_per_table(
136
137
 
137
138
  rows = dp.get_annotation(category_names=row_name, annotation_ids=item_ann_ids)
138
139
  if table.image is None:
139
- raise ValueError("table.image cannot be None")
140
+ raise ImageError("table.image cannot be None")
140
141
  table_embedding_box = table.get_bounding_box(dp.image_id)
141
142
 
142
143
  for row in rows:
143
144
  if row.image is None:
144
- raise ValueError("row.image cannot be None")
145
+ raise ImageError("row.image cannot be None")
145
146
  row_embedding_box = row.get_bounding_box(dp.image_id)
146
147
  row_embedding_box.ulx = table_embedding_box.ulx + 1.0
147
148
  row_embedding_box.lrx = table_embedding_box.lrx - 1.0
@@ -166,7 +167,7 @@ def stretch_item_per_table(
166
167
 
167
168
  for col in cols:
168
169
  if col.image is None:
169
- raise ValueError("row.image cannot be None")
170
+ raise ImageError("row.image cannot be None")
170
171
  col_embedding_box = col.get_bounding_box(dp.image_id)
171
172
  col_embedding_box.uly = table_embedding_box.uly + 1.0
172
173
  col_embedding_box.lry = table_embedding_box.lry - 1.0
@@ -194,7 +195,7 @@ def _tile_by_stretching_rows_left_and_rightwise(
194
195
  dp: Image, items: List[ImageAnnotation], table: ImageAnnotation, item_name: str
195
196
  ) -> None:
196
197
  if table.image is None:
197
- raise ValueError("table.image cannot be None")
198
+ raise ImageError("table.image cannot be None")
198
199
  table_embedding_box = table.get_bounding_box(dp.image_id)
199
200
 
200
201
  tmp_item_xy = table_embedding_box.uly + 1.0 if item_name == LayoutType.row else table_embedding_box.ulx + 1.0
@@ -206,7 +207,7 @@ def _tile_by_stretching_rows_left_and_rightwise(
206
207
  image_annotation={"category_name": item.category_name, "annotation_id": item.annotation_id},
207
208
  ):
208
209
  if item.image is None:
209
- raise ValueError("item.image cannot be None")
210
+ raise ImageError("item.image cannot be None")
210
211
  item_embedding_box = item.get_bounding_box(dp.image_id)
211
212
  if idx != len(items) - 1:
212
213
  next_item_embedding_box = items[idx + 1].get_bounding_box(dp.image_id)
@@ -258,7 +259,7 @@ def _tile_by_stretching_rows_leftwise_column_downwise(
258
259
  dp: Image, items: List[ImageAnnotation], table: ImageAnnotation, item_name: str
259
260
  ) -> None:
260
261
  if table.image is None:
261
- raise ValueError("table.image cannot be None")
262
+ raise ImageError("table.image cannot be None")
262
263
  table_embedding_box = table.get_bounding_box(dp.image_id)
263
264
 
264
265
  tmp_item_xy = table_embedding_box.uly + 1.0 if item_name == LayoutType.row else table_embedding_box.ulx + 1.0
@@ -270,7 +271,7 @@ def _tile_by_stretching_rows_leftwise_column_downwise(
270
271
  image_annotation={"category_name": item.category_name, "annotation_id": item.annotation_id},
271
272
  ):
272
273
  if item.image is None:
273
- raise ValueError("item.image cannot be None")
274
+ raise ImageError("item.image cannot be None")
274
275
  item_embedding_box = item.get_bounding_box(dp.image_id)
275
276
  new_embedding_box = BoundingBox(
276
277
  ulx=item_embedding_box.ulx if item_name == LayoutType.row else tmp_item_xy,
@@ -339,9 +340,9 @@ def tile_tables_with_items_per_table(
339
340
  items = dp.get_annotation(category_names=item_name, annotation_ids=item_ann_ids)
340
341
 
341
342
  items.sort(
342
- key=lambda x: x.get_bounding_box(dp.image_id).cx
343
- if item_name == LayoutType.column
344
- else x.get_bounding_box(dp.image_id).cy
343
+ key=lambda x: (
344
+ x.get_bounding_box(dp.image_id).cx if item_name == LayoutType.column else x.get_bounding_box(dp.image_id).cy
345
+ )
345
346
  )
346
347
 
347
348
  if stretch_rule == "left":
@@ -737,9 +738,11 @@ class TableSegmentationService(PipelineComponent):
737
738
 
738
739
  # we will assume that either all or no image attribute has been generated
739
740
  items.sort(
740
- key=lambda x: x.get_bounding_box(dp.image_id).cx # pylint: disable=W0640
741
- if item_name == LayoutType.column # pylint: disable=W0640
742
- else x.get_bounding_box(dp.image_id).cy # pylint: disable=W0640
741
+ key=lambda x: (
742
+ x.get_bounding_box(dp.image_id).cx # pylint: disable=W0640
743
+ if item_name == LayoutType.column # pylint: disable=W0640
744
+ else x.get_bounding_box(dp.image_id).cy # pylint: disable=W0640
745
+ )
743
746
  )
744
747
 
745
748
  for item_number, item in enumerate(items, 1):
@@ -939,9 +942,11 @@ class PubtablesSegmentationService(PipelineComponent):
939
942
 
940
943
  # we will assume that either all or no image attribute has been generated
941
944
  items.sort(
942
- key=lambda x: x.get_bounding_box(dp.image_id).cx
943
- if item_name == LayoutType.column # pylint: disable=W0640
944
- else x.get_bounding_box(dp.image_id).cy
945
+ key=lambda x: (
946
+ x.get_bounding_box(dp.image_id).cx
947
+ if item_name == LayoutType.column # pylint: disable=W0640
948
+ else x.get_bounding_box(dp.image_id).cy
949
+ )
945
950
  )
946
951
 
947
952
  for item_number, item in enumerate(items, 1):
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # File: cell.py
2
+ # File: sub_layout.py
3
3
 
4
4
  # Copyright 2021 Dr. Janis Meyer. All rights reserved.
5
5
  #
@@ -24,9 +24,11 @@ from typing import Dict, List, Mapping, Optional, Sequence, Union
24
24
 
25
25
  import numpy as np
26
26
 
27
+ from ..datapoint.annotation import ImageAnnotation
28
+ from ..datapoint.box import crop_box_from_image
27
29
  from ..datapoint.image import Image
28
30
  from ..extern.base import DetectionResult, ObjectDetector, PdfMiner
29
- from ..utils.detection_types import JsonDict
31
+ from ..utils.detection_types import ImageType, JsonDict
30
32
  from ..utils.settings import ObjectTypes, Relationships
31
33
  from ..utils.transform import PadTransform
32
34
  from .base import PredictorPipelineComponent
@@ -181,18 +183,14 @@ class SubImageLayoutService(PredictorPipelineComponent):
181
183
  """
182
184
  sub_image_anns = dp.get_annotation_iter(category_names=self.sub_image_name)
183
185
  for sub_image_ann in sub_image_anns:
184
- if sub_image_ann.image is None:
185
- raise ValueError("sub_image_ann.image is None, but must be an image")
186
- np_image = sub_image_ann.image.image
187
- if self.padder:
188
- np_image = self.padder.apply_image(np_image)
189
- detect_result_list = self.predictor.predict(np_image)
186
+ np_image = self.prepare_np_image(sub_image_ann)
187
+ detect_result_list = self.predictor.predict(np_image) # type: ignore
190
188
  if self.padder and detect_result_list:
191
189
  boxes = np.array([detect_result.box for detect_result in detect_result_list])
192
190
  boxes_orig = self.padder.inverse_apply_coords(boxes)
193
191
  for idx, detect_result in enumerate(detect_result_list):
194
192
  detect_result.box = boxes_orig[idx, :].tolist()
195
- if self.detect_result_generator:
193
+ if self.detect_result_generator and sub_image_ann.image:
196
194
  self.detect_result_generator.width = sub_image_ann.image.width
197
195
  self.detect_result_generator.height = sub_image_ann.image.height
198
196
  detect_result_list = self.detect_result_generator.create_detection_result(detect_result_list)
@@ -235,3 +233,26 @@ class SubImageLayoutService(PredictorPipelineComponent):
235
233
  deepcopy(self.detect_result_generator),
236
234
  padder_clone,
237
235
  )
236
+
237
+ def prepare_np_image(self, sub_image_ann: ImageAnnotation) -> ImageType:
238
+ """Maybe crop and pad a np_array before passing it to the predictor.
239
+
240
+ Note that we currently assume to a two level hierachy of images, e.g. we can crop a sub-image from the base
241
+ image, e.g. the original input but we cannot crop a sub-image from an image which is itself a sub-image.
242
+
243
+ :param sub_image_ann: ImageAnnotation to be processed
244
+ :return: processed np_image
245
+ """
246
+ if sub_image_ann.image is None:
247
+ raise ValueError("sub_image_ann.image is None, but must be an datapoint.Image")
248
+ np_image = sub_image_ann.image.image
249
+ if np_image is None and self.dp_manager.datapoint.image is not None:
250
+ np_image = crop_box_from_image(
251
+ self.dp_manager.datapoint.image,
252
+ sub_image_ann.get_bounding_box(self.dp_manager.datapoint.image_id),
253
+ self.dp_manager.datapoint.width,
254
+ self.dp_manager.datapoint.height,
255
+ )
256
+ if self.padder:
257
+ np_image = self.padder.apply_image(np_image)
258
+ return np_image
@@ -26,6 +26,7 @@ from ..datapoint.image import Image
26
26
  from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
27
27
  from ..extern.tessocr import TesseractOcrDetector
28
28
  from ..utils.detection_types import ImageType, JsonDict
29
+ from ..utils.error import ImageError
29
30
  from ..utils.settings import PageType, TypeOrStr, WordType, get_type
30
31
  from .base import PredictorPipelineComponent
31
32
  from .registry import pipeline_component_registry
@@ -89,7 +90,10 @@ class TextExtractionService(PredictorPipelineComponent):
89
90
  super().__init__(self._get_name(text_extract_detector.name), text_extract_detector)
90
91
  if self.extract_from_category:
91
92
  if not isinstance(self.predictor, (ObjectDetector, TextRecognizer)):
92
- raise TypeError("Predicting from a cropped image requires to pass an ObjectDetector or TextRecognizer.")
93
+ raise TypeError(
94
+ f"Predicting from a cropped image requires to pass an ObjectDetector or "
95
+ f"TextRecognizer. Got {type(self.predictor)}"
96
+ )
93
97
  if run_time_ocr_language_selection:
94
98
  assert isinstance(
95
99
  self.predictor, TesseractOcrDetector
@@ -171,13 +175,13 @@ class TextExtractionService(PredictorPipelineComponent):
171
175
 
172
176
  if isinstance(text_roi, ImageAnnotation):
173
177
  if text_roi.image is None:
174
- raise ValueError("text_roi.image cannot be None")
178
+ raise ImageError("text_roi.image cannot be None")
175
179
  if text_roi.image.image is None:
176
- raise ValueError("text_roi.image.image cannot be None")
180
+ raise ImageError("text_roi.image.image cannot be None")
177
181
  return text_roi.image.image
178
182
  if isinstance(self.predictor, ObjectDetector):
179
183
  if not isinstance(text_roi, Image):
180
- raise ValueError("text_roi must be an image")
184
+ raise ImageError("text_roi must be an image")
181
185
  return text_roi.image
182
186
  if isinstance(text_roi, list):
183
187
  assert all(roi.image is not None for roi in text_roi)
@@ -201,9 +205,11 @@ class TextExtractionService(PredictorPipelineComponent):
201
205
  [
202
206
  (
203
207
  "image_annotations",
204
- self.predictor.possible_categories()
205
- if isinstance(self.predictor, (ObjectDetector, PdfMiner))
206
- else [],
208
+ (
209
+ self.predictor.possible_categories()
210
+ if isinstance(self.predictor, (ObjectDetector, PdfMiner))
211
+ else []
212
+ ),
207
213
  ),
208
214
  ("sub_categories", sub_cat_dict),
209
215
  ("relationships", {}),
@@ -218,5 +224,5 @@ class TextExtractionService(PredictorPipelineComponent):
218
224
  def clone(self) -> "PredictorPipelineComponent":
219
225
  predictor = self.predictor.clone()
220
226
  if not isinstance(predictor, (ObjectDetector, PdfMiner, TextRecognizer)):
221
- raise ValueError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
227
+ raise ImageError(f"predictor must be of type ObjectDetector or PdfMiner, but is of type {type(predictor)}")
222
228
  return self.__class__(predictor, deepcopy(self.extract_from_category), self.run_time_ocr_language_selection)
@@ -23,7 +23,6 @@ on images (e.g. deskew, de-noising or more general GAN like operations.
23
23
  from ..datapoint.image import Image
24
24
  from ..extern.base import ImageTransformer
25
25
  from ..utils.detection_types import JsonDict
26
- from ..utils.logger import LoggingRecord, logger
27
26
  from .base import ImageTransformPipelineComponent
28
27
  from .registry import pipeline_component_registry
29
28
 
@@ -49,16 +48,24 @@ class SimpleTransformService(ImageTransformPipelineComponent):
49
48
 
50
49
  def serve(self, dp: Image) -> None:
51
50
  if dp.annotations:
52
- logger.warning(
53
- LoggingRecord(
54
- f"{self.name} has already received image with image annotations. These annotations "
55
- f"will not be transformed and might cause unexpected output in your pipeline."
56
- )
51
+ raise RuntimeError(
52
+ "SimpleTransformService receives datapoints with ÌmageAnnotations. This violates the "
53
+ "pipeline building API but this can currently be catched only at runtime. "
54
+ "Please make sure that this component is the first one in the pipeline."
57
55
  )
56
+
58
57
  if dp.image is not None:
59
- np_image_transform = self.transform_predictor.transform(dp.image)
58
+ detection_result = self.transform_predictor.predict(dp.image)
59
+ transformed_image = self.transform_predictor.transform(dp.image, detection_result)
60
60
  self.dp_manager.datapoint.clear_image(True)
61
- self.dp_manager.datapoint.image = np_image_transform
61
+ self.dp_manager.datapoint.image = transformed_image
62
+ self.dp_manager.set_summary_annotation(
63
+ summary_key=self.transform_predictor.possible_category(),
64
+ summary_name=self.transform_predictor.possible_category(),
65
+ summary_number=None,
66
+ summary_value=getattr(detection_result, self.transform_predictor.possible_category().value, None),
67
+ summary_score=detection_result.score,
68
+ )
62
69
 
63
70
  def clone(self) -> "SimpleTransformService":
64
71
  return self.__class__(self.transform_predictor)
@@ -69,7 +76,7 @@ class SimpleTransformService(ImageTransformPipelineComponent):
69
76
  ("image_annotations", []),
70
77
  ("sub_categories", {}),
71
78
  ("relationships", {}),
72
- ("summaries", []),
79
+ ("summaries", [self.transform_predictor.possible_category()]),
73
80
  ]
74
81
  )
75
82
 
@@ -19,20 +19,14 @@
19
19
  Init module for train package
20
20
  """
21
21
 
22
- from ..utils.file_utils import (
23
- detectron2_available,
24
- pytorch_available,
25
- tensorpack_available,
26
- tf_available,
27
- transformers_available,
28
- )
22
+ from ..utils.file_utils import detectron2_available, tensorpack_available, transformers_available
29
23
 
30
- if tf_available() and tensorpack_available():
31
- from .tp_frcnn_train import train_faster_rcnn
32
-
33
- if pytorch_available() and detectron2_available():
24
+ if detectron2_available():
34
25
  from .d2_frcnn_train import train_d2_faster_rcnn
35
26
 
36
- if pytorch_available() and transformers_available():
27
+ if transformers_available():
37
28
  from .hf_detr_train import train_hf_detr
38
29
  from .hf_layoutlm_train import train_hf_layoutlm
30
+
31
+ if tensorpack_available():
32
+ from .tp_frcnn_train import train_faster_rcnn