deepdoctection 0.39.7__tar.gz → 0.40.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/PKG-INFO +1 -1
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/__init__.py +2 -1
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/analyzer/_config.py +0 -1
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/analyzer/factory.py +34 -13
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datapoint/image.py +5 -5
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datapoint/view.py +5 -5
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/match.py +28 -8
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/anngen.py +1 -25
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/common.py +91 -38
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/layout.py +26 -13
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/order.py +6 -22
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/segment.py +36 -43
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/sub_layout.py +1 -10
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/text.py +5 -14
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/train/hf_detr_train.py +1 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection.egg-info/PKG-INFO +1 -1
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/LICENSE +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/README.md +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datapoint/convert.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/doctrocr.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/context.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/env_info.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/file_utils.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/pdf_utils.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/settings.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection.egg-info/requires.txt +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/setup.cfg +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/setup.py +0 -0
- {deepdoctection-0.39.7 → deepdoctection-0.40.0}/tests/test_utils.py +0 -0
|
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
|
|
|
25
25
|
|
|
26
26
|
# pylint: enable=wrong-import-position
|
|
27
27
|
|
|
28
|
-
__version__ = "0.
|
|
28
|
+
__version__ = "0.40.0"
|
|
29
29
|
|
|
30
30
|
_IMPORT_STRUCTURE = {
|
|
31
31
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
|
|
@@ -260,6 +260,7 @@ _IMPORT_STRUCTURE = {
|
|
|
260
260
|
"ImageCroppingService",
|
|
261
261
|
"IntersectionMatcher",
|
|
262
262
|
"NeighbourMatcher",
|
|
263
|
+
"FamilyCompound",
|
|
263
264
|
"MatchingService",
|
|
264
265
|
"PageParsingService",
|
|
265
266
|
"AnnotationNmsService",
|
|
@@ -72,7 +72,6 @@ cfg.SEGMENTATION.THRESHOLD_COLS = 0.4
|
|
|
72
72
|
cfg.SEGMENTATION.FULL_TABLE_TILING = True
|
|
73
73
|
cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS = 0.001
|
|
74
74
|
cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS = 0.001
|
|
75
|
-
cfg.SEGMENTATION.CELL_CATEGORY_ID = 12
|
|
76
75
|
cfg.SEGMENTATION.TABLE_NAME = LayoutType.TABLE
|
|
77
76
|
cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
|
|
78
77
|
CellType.SPANNING,
|
|
@@ -35,13 +35,14 @@ from ..extern.tpdetect import TPFrcnnDetector
|
|
|
35
35
|
from ..pipe.base import PipelineComponent
|
|
36
36
|
from ..pipe.common import (
|
|
37
37
|
AnnotationNmsService,
|
|
38
|
+
FamilyCompound,
|
|
38
39
|
IntersectionMatcher,
|
|
39
40
|
MatchingService,
|
|
40
41
|
NeighbourMatcher,
|
|
41
42
|
PageParsingService,
|
|
42
43
|
)
|
|
43
44
|
from ..pipe.doctectionpipe import DoctectionPipe
|
|
44
|
-
from ..pipe.layout import ImageLayoutService
|
|
45
|
+
from ..pipe.layout import ImageLayoutService, skip_if_category_or_service_extracted
|
|
45
46
|
from ..pipe.order import TextOrderService
|
|
46
47
|
from ..pipe.refine import TableSegmentationRefinementService
|
|
47
48
|
from ..pipe.segment import PubtablesSegmentationService, TableSegmentationService
|
|
@@ -284,7 +285,6 @@ class ServiceFactory:
|
|
|
284
285
|
return SubImageLayoutService(
|
|
285
286
|
sub_image_detector=detector,
|
|
286
287
|
sub_image_names=[LayoutType.TABLE, LayoutType.TABLE_ROTATED],
|
|
287
|
-
category_id_mapping=None,
|
|
288
288
|
detect_result_generator=detect_result_generator,
|
|
289
289
|
padder=padder,
|
|
290
290
|
)
|
|
@@ -405,7 +405,6 @@ class ServiceFactory:
|
|
|
405
405
|
tile_table_with_items=config.SEGMENTATION.FULL_TABLE_TILING,
|
|
406
406
|
remove_iou_threshold_rows=config.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS,
|
|
407
407
|
remove_iou_threshold_cols=config.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS,
|
|
408
|
-
cell_class_id=config.SEGMENTATION.CELL_CATEGORY_ID,
|
|
409
408
|
table_name=config.SEGMENTATION.TABLE_NAME,
|
|
410
409
|
cell_names=config.SEGMENTATION.PUBTABLES_CELL_NAMES,
|
|
411
410
|
spanning_cell_names=config.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES,
|
|
@@ -516,6 +515,15 @@ class ServiceFactory:
|
|
|
516
515
|
"""
|
|
517
516
|
return ServiceFactory._build_pdf_miner_text_service(detector)
|
|
518
517
|
|
|
518
|
+
@staticmethod
|
|
519
|
+
def _build_doctr_word_detector_service(detector: DoctrTextlineDetector) -> ImageLayoutService:
|
|
520
|
+
"""Building a Doctr word detector service
|
|
521
|
+
|
|
522
|
+
:param detector: DoctrTextlineDetector
|
|
523
|
+
:return: ImageLayoutService
|
|
524
|
+
"""
|
|
525
|
+
return ImageLayoutService(layout_detector=detector, to_image=True, crop_image=True)
|
|
526
|
+
|
|
519
527
|
@staticmethod
|
|
520
528
|
def build_doctr_word_detector_service(detector: DoctrTextlineDetector) -> ImageLayoutService:
|
|
521
529
|
"""Building a Doctr word detector service
|
|
@@ -523,9 +531,7 @@ class ServiceFactory:
|
|
|
523
531
|
:param detector: DoctrTextlineDetector
|
|
524
532
|
:return: ImageLayoutService
|
|
525
533
|
"""
|
|
526
|
-
return
|
|
527
|
-
layout_detector=detector, to_image=True, crop_image=True, skip_if_layout_extracted=True
|
|
528
|
-
)
|
|
534
|
+
return ServiceFactory._build_doctr_word_detector_service(detector)
|
|
529
535
|
|
|
530
536
|
@staticmethod
|
|
531
537
|
def _build_text_extraction_service(
|
|
@@ -539,7 +545,6 @@ class ServiceFactory:
|
|
|
539
545
|
"""
|
|
540
546
|
return TextExtractionService(
|
|
541
547
|
detector,
|
|
542
|
-
skip_if_text_extracted=config.USE_PDF_MINER,
|
|
543
548
|
extract_from_roi=config.TEXT_CONTAINER if config.OCR.USE_DOCTR else None,
|
|
544
549
|
)
|
|
545
550
|
|
|
@@ -567,11 +572,16 @@ class ServiceFactory:
|
|
|
567
572
|
threshold=config.WORD_MATCHING.THRESHOLD,
|
|
568
573
|
max_parent_only=config.WORD_MATCHING.MAX_PARENT_ONLY,
|
|
569
574
|
)
|
|
575
|
+
family_compounds = [
|
|
576
|
+
FamilyCompound(
|
|
577
|
+
parent_categories=config.WORD_MATCHING.PARENTAL_CATEGORIES,
|
|
578
|
+
child_categories=config.TEXT_CONTAINER,
|
|
579
|
+
relationship_key=Relationships.CHILD,
|
|
580
|
+
)
|
|
581
|
+
]
|
|
570
582
|
return MatchingService(
|
|
571
|
-
|
|
572
|
-
child_categories=config.TEXT_CONTAINER,
|
|
583
|
+
family_compounds=family_compounds,
|
|
573
584
|
matcher=matcher,
|
|
574
|
-
relationship_key=Relationships.CHILD,
|
|
575
585
|
)
|
|
576
586
|
|
|
577
587
|
@staticmethod
|
|
@@ -591,11 +601,16 @@ class ServiceFactory:
|
|
|
591
601
|
:return: MatchingService
|
|
592
602
|
"""
|
|
593
603
|
neighbor_matcher = NeighbourMatcher()
|
|
604
|
+
family_compounds = [
|
|
605
|
+
FamilyCompound(
|
|
606
|
+
parent_categories=config.LAYOUT_LINK.PARENTAL_CATEGORIES,
|
|
607
|
+
child_categories=config.LAYOUT_LINK.CHILD_CATEGORIES,
|
|
608
|
+
relationship_key=Relationships.LAYOUT_LINK,
|
|
609
|
+
)
|
|
610
|
+
]
|
|
594
611
|
return MatchingService(
|
|
595
|
-
|
|
596
|
-
child_categories=config.LAYOUT_LINK.CHILD_CATEGORIES,
|
|
612
|
+
family_compounds=family_compounds,
|
|
597
613
|
matcher=neighbor_matcher,
|
|
598
|
-
relationship_key=Relationships.LAYOUT_LINK,
|
|
599
614
|
)
|
|
600
615
|
|
|
601
616
|
@staticmethod
|
|
@@ -699,9 +714,11 @@ class ServiceFactory:
|
|
|
699
714
|
table_refinement_service = ServiceFactory.build_table_refinement_service(config)
|
|
700
715
|
pipe_component_list.append(table_refinement_service)
|
|
701
716
|
|
|
717
|
+
d_text_service_id = ""
|
|
702
718
|
if config.USE_PDF_MINER:
|
|
703
719
|
pdf_miner = ServiceFactory.build_pdf_text_detector(config)
|
|
704
720
|
d_text = ServiceFactory.build_pdf_miner_text_service(pdf_miner)
|
|
721
|
+
d_text_service_id = d_text.service_id
|
|
705
722
|
pipe_component_list.append(d_text)
|
|
706
723
|
|
|
707
724
|
# setup ocr
|
|
@@ -710,10 +727,14 @@ class ServiceFactory:
|
|
|
710
727
|
if config.OCR.USE_DOCTR:
|
|
711
728
|
word_detector = ServiceFactory.build_doctr_word_detector(config)
|
|
712
729
|
word_service = ServiceFactory.build_doctr_word_detector_service(word_detector)
|
|
730
|
+
word_service.set_inbound_filter(skip_if_category_or_service_extracted(service_ids=d_text_service_id))
|
|
713
731
|
pipe_component_list.append(word_service)
|
|
714
732
|
|
|
715
733
|
ocr_detector = ServiceFactory.build_ocr_detector(config)
|
|
716
734
|
text_extraction_service = ServiceFactory.build_text_extraction_service(config, ocr_detector)
|
|
735
|
+
text_extraction_service.set_inbound_filter(
|
|
736
|
+
skip_if_category_or_service_extracted(service_ids=d_text_service_id)
|
|
737
|
+
)
|
|
717
738
|
pipe_component_list.append(text_extraction_service)
|
|
718
739
|
|
|
719
740
|
if config.USE_PDF_MINER or config.USE_OCR:
|
|
@@ -342,7 +342,7 @@ class Image:
|
|
|
342
342
|
self,
|
|
343
343
|
category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
|
|
344
344
|
annotation_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
345
|
-
|
|
345
|
+
service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
346
346
|
model_id: Optional[Union[str, Sequence[str]]] = None,
|
|
347
347
|
session_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
348
348
|
ignore_inactive: bool = True,
|
|
@@ -356,7 +356,7 @@ class Image:
|
|
|
356
356
|
|
|
357
357
|
:param category_names: A single name or list of names
|
|
358
358
|
:param annotation_ids: A single id or list of ids
|
|
359
|
-
:param
|
|
359
|
+
:param service_ids: A single service name or list of service names
|
|
360
360
|
:param model_id: A single model name or list of model names
|
|
361
361
|
:param session_ids: A single session id or list of session ids
|
|
362
362
|
:param ignore_inactive: If set to `True` only active annotations are returned.
|
|
@@ -372,7 +372,7 @@ class Image:
|
|
|
372
372
|
)
|
|
373
373
|
|
|
374
374
|
ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
|
|
375
|
-
|
|
375
|
+
service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
|
|
376
376
|
model_id = [model_id] if isinstance(model_id, str) else model_id
|
|
377
377
|
session_id = [session_ids] if isinstance(session_ids, str) else session_ids
|
|
378
378
|
|
|
@@ -387,8 +387,8 @@ class Image:
|
|
|
387
387
|
if ann_ids is not None:
|
|
388
388
|
anns = filter(lambda x: x.annotation_id in ann_ids, anns)
|
|
389
389
|
|
|
390
|
-
if
|
|
391
|
-
anns = filter(lambda x: x.service_id in
|
|
390
|
+
if service_ids is not None:
|
|
391
|
+
anns = filter(lambda x: x.service_id in service_ids, anns)
|
|
392
392
|
|
|
393
393
|
if model_id is not None:
|
|
394
394
|
anns = filter(lambda x: x.model_id in model_id, anns)
|
|
@@ -659,7 +659,7 @@ class Page(Image):
|
|
|
659
659
|
self,
|
|
660
660
|
category_names: Optional[Union[str, ObjectTypes, Sequence[Union[str, ObjectTypes]]]] = None,
|
|
661
661
|
annotation_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
662
|
-
|
|
662
|
+
service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
663
663
|
model_id: Optional[Union[str, Sequence[str]]] = None,
|
|
664
664
|
session_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
665
665
|
ignore_inactive: bool = True,
|
|
@@ -676,7 +676,7 @@ class Page(Image):
|
|
|
676
676
|
|
|
677
677
|
:param category_names: A single name or list of names
|
|
678
678
|
:param annotation_ids: A single id or list of ids
|
|
679
|
-
:param
|
|
679
|
+
:param service_ids: A single service name or list of service names
|
|
680
680
|
:param model_id: A single model name or list of model names
|
|
681
681
|
:param session_ids: A single session id or list of session ids
|
|
682
682
|
:param ignore_inactive: If set to `True` only active annotations are returned.
|
|
@@ -691,7 +691,7 @@ class Page(Image):
|
|
|
691
691
|
else tuple(get_type(cat_name) for cat_name in category_names)
|
|
692
692
|
)
|
|
693
693
|
ann_ids = [annotation_ids] if isinstance(annotation_ids, str) else annotation_ids
|
|
694
|
-
|
|
694
|
+
service_ids = [service_ids] if isinstance(service_ids, str) else service_ids
|
|
695
695
|
model_id = [model_id] if isinstance(model_id, str) else model_id
|
|
696
696
|
session_id = [session_ids] if isinstance(session_ids, str) else session_ids
|
|
697
697
|
|
|
@@ -706,8 +706,8 @@ class Page(Image):
|
|
|
706
706
|
if ann_ids is not None:
|
|
707
707
|
anns = filter(lambda x: x.annotation_id in ann_ids, anns)
|
|
708
708
|
|
|
709
|
-
if
|
|
710
|
-
anns = filter(lambda x: x.generating_service in
|
|
709
|
+
if service_ids is not None:
|
|
710
|
+
anns = filter(lambda x: x.generating_service in service_ids, anns)
|
|
711
711
|
|
|
712
712
|
if model_id is not None:
|
|
713
713
|
anns = filter(lambda x: x.generating_model in model_id, anns)
|
|
@@ -34,13 +34,15 @@ from ..utils.settings import TypeOrStr
|
|
|
34
34
|
|
|
35
35
|
def match_anns_by_intersection(
|
|
36
36
|
dp: Image,
|
|
37
|
-
parent_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
38
|
-
child_ann_category_names: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
39
37
|
matching_rule: Literal["iou", "ioa"],
|
|
40
38
|
threshold: float,
|
|
41
39
|
use_weighted_intersections: bool = False,
|
|
40
|
+
parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
41
|
+
child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
42
42
|
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
43
43
|
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
44
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
45
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
44
46
|
max_parent_only: bool = False,
|
|
45
47
|
) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
|
|
46
48
|
"""
|
|
@@ -87,13 +89,19 @@ def match_anns_by_intersection(
|
|
|
87
89
|
dates which are not in the list.
|
|
88
90
|
:param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
89
91
|
candidates which are not in the list.
|
|
92
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
93
|
+
parent candidates which are not in the list.
|
|
94
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
95
|
+
children candidates which are not in the list.
|
|
90
96
|
:param max_parent_only: Will assign to each child at most one parent with maximum ioa
|
|
91
97
|
:return: child indices, parent indices (see Example), list of parent ids and list of children ids.
|
|
92
98
|
"""
|
|
93
99
|
|
|
94
100
|
assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
|
|
95
101
|
|
|
96
|
-
child_anns = dp.get_annotation(
|
|
102
|
+
child_anns = dp.get_annotation(
|
|
103
|
+
annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
|
|
104
|
+
)
|
|
97
105
|
child_ann_boxes = np.array(
|
|
98
106
|
[
|
|
99
107
|
ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
|
|
@@ -101,7 +109,9 @@ def match_anns_by_intersection(
|
|
|
101
109
|
]
|
|
102
110
|
)
|
|
103
111
|
|
|
104
|
-
parent_anns = dp.get_annotation(
|
|
112
|
+
parent_anns = dp.get_annotation(
|
|
113
|
+
annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
|
|
114
|
+
)
|
|
105
115
|
parent_ann_boxes = np.array(
|
|
106
116
|
[
|
|
107
117
|
ann.get_bounding_box(dp.image_id).transform(dp.width, dp.height, absolute_coords=True).to_list(mode="xyxy")
|
|
@@ -147,10 +157,12 @@ def match_anns_by_intersection(
|
|
|
147
157
|
|
|
148
158
|
def match_anns_by_distance(
|
|
149
159
|
dp: Image,
|
|
150
|
-
parent_ann_category_names:
|
|
151
|
-
child_ann_category_names:
|
|
160
|
+
parent_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
|
|
161
|
+
child_ann_category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]]=None,
|
|
152
162
|
parent_ann_ids: Optional[Union[Sequence[str], str]] = None,
|
|
153
163
|
child_ann_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
164
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
165
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
154
166
|
) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
|
|
155
167
|
"""
|
|
156
168
|
Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
|
|
@@ -164,11 +176,19 @@ def match_anns_by_distance(
|
|
|
164
176
|
dates which are not in the list.
|
|
165
177
|
:param child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
166
178
|
candidates which are not in the list.
|
|
179
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
180
|
+
parent candidates which are not in the list.
|
|
181
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
182
|
+
children candidates which are not in the list.
|
|
167
183
|
:return:
|
|
168
184
|
"""
|
|
169
185
|
|
|
170
|
-
parent_anns = dp.get_annotation(
|
|
171
|
-
|
|
186
|
+
parent_anns = dp.get_annotation(
|
|
187
|
+
annotation_ids=parent_ann_ids, category_names=parent_ann_category_names, service_ids=parent_ann_service_ids
|
|
188
|
+
)
|
|
189
|
+
child_anns = dp.get_annotation(
|
|
190
|
+
annotation_ids=child_ann_ids, category_names=child_ann_category_names, service_ids=child_ann_service_ids
|
|
191
|
+
)
|
|
172
192
|
child_centers = [block.get_bounding_box(dp.image_id).center for block in child_anns]
|
|
173
193
|
parent_centers = [block.get_bounding_box(dp.image_id).center for block in parent_anns]
|
|
174
194
|
if child_centers and parent_centers:
|
|
@@ -75,27 +75,6 @@ class DatapointManager:
|
|
|
75
75
|
"""
|
|
76
76
|
assert self.datapoint_is_passed, "Pass datapoint to DatapointManager before creating anns"
|
|
77
77
|
|
|
78
|
-
def maybe_map_category_id(self, category_id: Union[str, int]) -> int:
|
|
79
|
-
"""
|
|
80
|
-
Maps categories if a category id mapping is provided in `__init__`.
|
|
81
|
-
|
|
82
|
-
:param category_id: category id via integer or string.
|
|
83
|
-
:return: mapped category id
|
|
84
|
-
"""
|
|
85
|
-
if self.category_id_mapping is None:
|
|
86
|
-
return int(category_id)
|
|
87
|
-
return self.category_id_mapping[int(category_id)]
|
|
88
|
-
|
|
89
|
-
def set_category_id_mapping(self, category_id_mapping: Mapping[int, int]) -> None:
|
|
90
|
-
"""
|
|
91
|
-
In many cases the category ids sent back from a model have to be modified. Pass a mapping from model
|
|
92
|
-
category ids to target annotation category ids.
|
|
93
|
-
|
|
94
|
-
:param category_id_mapping: A mapping of model category ids (sent from DetectionResult) to category ids (saved
|
|
95
|
-
in annotations)
|
|
96
|
-
"""
|
|
97
|
-
self.category_id_mapping = category_id_mapping
|
|
98
|
-
|
|
99
78
|
def set_image_annotation(
|
|
100
79
|
self,
|
|
101
80
|
detect_result: DetectionResult,
|
|
@@ -127,13 +106,10 @@ class DatapointManager:
|
|
|
127
106
|
:return: the annotation_id of the generated image annotation
|
|
128
107
|
"""
|
|
129
108
|
self.assert_datapoint_passed()
|
|
130
|
-
if detect_result.class_id is None:
|
|
131
|
-
raise ValueError("class_id of detect_result cannot be None")
|
|
132
109
|
if not isinstance(detect_result.box, (list, np.ndarray)):
|
|
133
110
|
raise TypeError(
|
|
134
111
|
f"detect_result.box must be of type list or np.ndarray, but is of type {(type(detect_result.box))}"
|
|
135
112
|
)
|
|
136
|
-
detect_result.class_id = self.maybe_map_category_id(detect_result.class_id)
|
|
137
113
|
with MappingContextManager(
|
|
138
114
|
dp_name=self.datapoint.file_name, filter_level="annotation", detect_result=asdict(detect_result)
|
|
139
115
|
) as annotation_context:
|
|
@@ -155,7 +131,7 @@ class DatapointManager:
|
|
|
155
131
|
ann = ImageAnnotation(
|
|
156
132
|
category_name=detect_result.class_name,
|
|
157
133
|
bounding_box=box,
|
|
158
|
-
category_id=detect_result.class_id,
|
|
134
|
+
category_id=detect_result.class_id if detect_result.class_id is not None else DEFAULT_CATEGORY_ID,
|
|
159
135
|
score=detect_result.score,
|
|
160
136
|
service_id=self.service_id,
|
|
161
137
|
model_id=self.model_id,
|
|
@@ -22,6 +22,7 @@ from __future__ import annotations
|
|
|
22
22
|
|
|
23
23
|
import os
|
|
24
24
|
from copy import deepcopy
|
|
25
|
+
from dataclasses import dataclass, field
|
|
25
26
|
from typing import Literal, Mapping, Optional, Sequence, Union
|
|
26
27
|
|
|
27
28
|
import numpy as np
|
|
@@ -49,24 +50,30 @@ class ImageCroppingService(PipelineComponent):
|
|
|
49
50
|
generally not stored.
|
|
50
51
|
"""
|
|
51
52
|
|
|
52
|
-
def __init__(
|
|
53
|
+
def __init__(
|
|
54
|
+
self, category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
55
|
+
service_ids: Optional[Sequence[str]] = None
|
|
56
|
+
) -> None:
|
|
53
57
|
"""
|
|
54
58
|
:param category_names: A single name or a list of category names to crop
|
|
55
59
|
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
if category_names is None:
|
|
61
|
+
self.category_names = None
|
|
62
|
+
else:
|
|
63
|
+
self.category_names = (
|
|
64
|
+
(category_names,)
|
|
65
|
+
if isinstance(category_names, str)
|
|
66
|
+
else tuple(get_type(category_name) for category_name in category_names)
|
|
67
|
+
)
|
|
68
|
+
self.service_ids = service_ids
|
|
62
69
|
super().__init__("image_crop")
|
|
63
70
|
|
|
64
71
|
def serve(self, dp: Image) -> None:
|
|
65
|
-
for ann in dp.get_annotation(category_names=self.category_names):
|
|
72
|
+
for ann in dp.get_annotation(category_names=self.category_names, service_ids=self.service_ids):
|
|
66
73
|
dp.image_ann_to_image(ann.annotation_id, crop_image=True)
|
|
67
74
|
|
|
68
75
|
def clone(self) -> ImageCroppingService:
|
|
69
|
-
return self.__class__(self.category_names)
|
|
76
|
+
return self.__class__(self.category_names, self.service_ids)
|
|
70
77
|
|
|
71
78
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
72
79
|
return MetaAnnotation(image_annotations=(), sub_categories={}, relationships={}, summaries=())
|
|
@@ -124,8 +131,10 @@ class IntersectionMatcher:
|
|
|
124
131
|
def match(
|
|
125
132
|
self,
|
|
126
133
|
dp: Image,
|
|
127
|
-
parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
128
|
-
child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
134
|
+
parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
135
|
+
child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
136
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
137
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
129
138
|
) -> list[tuple[str, str]]:
|
|
130
139
|
"""
|
|
131
140
|
The matching algorithm
|
|
@@ -133,6 +142,10 @@ class IntersectionMatcher:
|
|
|
133
142
|
:param dp: datapoint image
|
|
134
143
|
:param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
|
|
135
144
|
:param child_categories: list of categories to be used for a child class.
|
|
145
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
146
|
+
parent candidates which are not in the list.
|
|
147
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
148
|
+
children candidates which are not in the list.
|
|
136
149
|
|
|
137
150
|
:return: A list of tuples with parent and child annotation ids
|
|
138
151
|
"""
|
|
@@ -144,6 +157,8 @@ class IntersectionMatcher:
|
|
|
144
157
|
threshold=self.threshold,
|
|
145
158
|
use_weighted_intersections=self.use_weighted_intersections,
|
|
146
159
|
max_parent_only=self.max_parent_only,
|
|
160
|
+
parent_ann_service_ids=parent_ann_service_ids,
|
|
161
|
+
child_ann_service_ids=child_ann_service_ids,
|
|
147
162
|
)
|
|
148
163
|
|
|
149
164
|
matched_child_anns = np.take(child_anns, child_index) # type: ignore
|
|
@@ -174,8 +189,10 @@ class NeighbourMatcher:
|
|
|
174
189
|
def match(
|
|
175
190
|
self,
|
|
176
191
|
dp: Image,
|
|
177
|
-
parent_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
178
|
-
child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
192
|
+
parent_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
193
|
+
child_categories: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
194
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
195
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
179
196
|
) -> list[tuple[str, str]]:
|
|
180
197
|
"""
|
|
181
198
|
The matching algorithm
|
|
@@ -183,16 +200,54 @@ class NeighbourMatcher:
|
|
|
183
200
|
:param dp: datapoint image
|
|
184
201
|
:param parent_categories: list of categories to be used a for parent class. Will generate a child-relationship
|
|
185
202
|
:param child_categories: list of categories to be used for a child class.
|
|
203
|
+
:param parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
204
|
+
parent candidates which are not in the list.
|
|
205
|
+
:param child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other
|
|
206
|
+
children candidates which are not in the list.
|
|
186
207
|
|
|
187
208
|
:return: A list of tuples with parent and child annotation ids
|
|
188
209
|
"""
|
|
189
210
|
|
|
190
211
|
return [
|
|
191
212
|
(pair[0].annotation_id, pair[1].annotation_id)
|
|
192
|
-
for pair in match_anns_by_distance(
|
|
213
|
+
for pair in match_anns_by_distance(
|
|
214
|
+
dp,
|
|
215
|
+
parent_ann_category_names=parent_categories,
|
|
216
|
+
child_ann_category_names=child_categories,
|
|
217
|
+
parent_ann_service_ids=parent_ann_service_ids,
|
|
218
|
+
child_ann_service_ids=child_ann_service_ids,
|
|
219
|
+
)
|
|
193
220
|
]
|
|
194
221
|
|
|
195
222
|
|
|
223
|
+
@dataclass
|
|
224
|
+
class FamilyCompound:
|
|
225
|
+
"""
|
|
226
|
+
A family compound is a set of parent and child categories that are related by a relationship key. The parent
|
|
227
|
+
categories will receive a relationship to the child categories.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
relationship_key: Relationships
|
|
231
|
+
parent_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
|
|
232
|
+
child_categories: Optional[Union[ObjectTypes, Sequence[ObjectTypes]]] = field(default=None)
|
|
233
|
+
parent_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
|
|
234
|
+
child_ann_service_ids: Optional[Union[str, Sequence[str]]] = field(default=None)
|
|
235
|
+
|
|
236
|
+
def __post_init__(self) -> None:
|
|
237
|
+
if isinstance(self.parent_categories, str):
|
|
238
|
+
self.parent_categories = (get_type(self.parent_categories),)
|
|
239
|
+
elif self.parent_categories is not None:
|
|
240
|
+
self.parent_categories = tuple(get_type(parent) for parent in self.parent_categories)
|
|
241
|
+
if isinstance(self.child_categories, str):
|
|
242
|
+
self.child_categories = (get_type(self.child_categories),)
|
|
243
|
+
elif self.child_categories is not None:
|
|
244
|
+
self.child_categories = tuple(get_type(child) for child in self.child_categories)
|
|
245
|
+
if isinstance(self.parent_ann_service_ids, str):
|
|
246
|
+
self.parent_ann_service_ids = (self.parent_ann_service_ids,)
|
|
247
|
+
if isinstance(self.child_ann_service_ids, str):
|
|
248
|
+
self.child_ann_service_ids = (self.child_ann_service_ids,)
|
|
249
|
+
|
|
250
|
+
|
|
196
251
|
@pipeline_component_registry.register("MatchingService")
|
|
197
252
|
class MatchingService(PipelineComponent):
|
|
198
253
|
"""
|
|
@@ -202,28 +257,15 @@ class MatchingService(PipelineComponent):
|
|
|
202
257
|
|
|
203
258
|
def __init__(
|
|
204
259
|
self,
|
|
205
|
-
|
|
206
|
-
child_categories: Union[TypeOrStr, Sequence[TypeOrStr]],
|
|
260
|
+
family_compounds: Sequence[FamilyCompound],
|
|
207
261
|
matcher: Union[IntersectionMatcher, NeighbourMatcher],
|
|
208
|
-
relationship_key: Relationships,
|
|
209
262
|
) -> None:
|
|
210
263
|
"""
|
|
211
|
-
:param
|
|
212
|
-
:param
|
|
213
|
-
|
|
264
|
+
:param family_compounds: A list of FamilyCompounds
|
|
265
|
+
:param matcher: A matcher object
|
|
214
266
|
"""
|
|
215
|
-
self.
|
|
216
|
-
(get_type(parent_categories),)
|
|
217
|
-
if isinstance(parent_categories, str)
|
|
218
|
-
else tuple(get_type(category_name) for category_name in parent_categories)
|
|
219
|
-
)
|
|
220
|
-
self.child_categories = (
|
|
221
|
-
(get_type(child_categories),)
|
|
222
|
-
if isinstance(child_categories, str)
|
|
223
|
-
else (tuple(get_type(category_name) for category_name in child_categories))
|
|
224
|
-
)
|
|
267
|
+
self.family_compounds = family_compounds
|
|
225
268
|
self.matcher = matcher
|
|
226
|
-
self.relationship_key = relationship_key
|
|
227
269
|
super().__init__("matching")
|
|
228
270
|
|
|
229
271
|
def serve(self, dp: Image) -> None:
|
|
@@ -233,20 +275,31 @@ class MatchingService(PipelineComponent):
|
|
|
233
275
|
|
|
234
276
|
:param dp: datapoint image
|
|
235
277
|
"""
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
278
|
+
for family_compound in self.family_compounds:
|
|
279
|
+
matched_pairs = self.matcher.match(
|
|
280
|
+
dp,
|
|
281
|
+
parent_categories=family_compound.parent_categories,
|
|
282
|
+
child_categories=family_compound.child_categories,
|
|
283
|
+
parent_ann_service_ids=family_compound.parent_ann_service_ids,
|
|
284
|
+
child_ann_service_ids=family_compound.child_ann_service_ids,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
for pair in matched_pairs:
|
|
288
|
+
self.dp_manager.set_relationship_annotation(family_compound.relationship_key, pair[0], pair[1])
|
|
241
289
|
|
|
242
290
|
def clone(self) -> PipelineComponent:
|
|
243
|
-
return self.__class__(self.
|
|
291
|
+
return self.__class__(self.family_compounds, self.matcher)
|
|
244
292
|
|
|
245
293
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
294
|
+
relationships: dict[ObjectTypes, set[ObjectTypes]] = {}
|
|
295
|
+
for family_compound in self.family_compounds:
|
|
296
|
+
if family_compound.parent_categories is not None:
|
|
297
|
+
for parent_category in family_compound.parent_categories:
|
|
298
|
+
relationships[parent_category] = {family_compound.relationship_key} # type: ignore
|
|
246
299
|
return MetaAnnotation(
|
|
247
300
|
image_annotations=(),
|
|
248
301
|
sub_categories={},
|
|
249
|
-
relationships=
|
|
302
|
+
relationships=relationships,
|
|
250
303
|
summaries=(),
|
|
251
304
|
)
|
|
252
305
|
|