PyPI - deepdoctection - Versions diffs - 0.31__py3-none-any.whl → 0.33__py3-none-any.whl - Mend

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show

deepdoctection/__init__.py +16 -29
deepdoctection/analyzer/dd.py +70 -59
deepdoctection/configs/conf_dd_one.yaml +34 -31
deepdoctection/dataflow/common.py +9 -5
deepdoctection/dataflow/custom.py +5 -5
deepdoctection/dataflow/custom_serialize.py +75 -18
deepdoctection/dataflow/parallel_map.py +3 -3
deepdoctection/dataflow/serialize.py +4 -4
deepdoctection/dataflow/stats.py +3 -3
deepdoctection/datapoint/annotation.py +41 -56
deepdoctection/datapoint/box.py +9 -8
deepdoctection/datapoint/convert.py +6 -6
deepdoctection/datapoint/image.py +56 -44
deepdoctection/datapoint/view.py +245 -150
deepdoctection/datasets/__init__.py +1 -4
deepdoctection/datasets/adapter.py +35 -26
deepdoctection/datasets/base.py +14 -12
deepdoctection/datasets/dataflow_builder.py +3 -3
deepdoctection/datasets/info.py +24 -26
deepdoctection/datasets/instances/doclaynet.py +51 -51
deepdoctection/datasets/instances/fintabnet.py +46 -46
deepdoctection/datasets/instances/funsd.py +25 -24
deepdoctection/datasets/instances/iiitar13k.py +13 -10
deepdoctection/datasets/instances/layouttest.py +4 -3
deepdoctection/datasets/instances/publaynet.py +5 -5
deepdoctection/datasets/instances/pubtables1m.py +24 -21
deepdoctection/datasets/instances/pubtabnet.py +32 -30
deepdoctection/datasets/instances/rvlcdip.py +30 -30
deepdoctection/datasets/instances/xfund.py +26 -26
deepdoctection/datasets/save.py +6 -6
deepdoctection/eval/__init__.py +1 -4
deepdoctection/eval/accmetric.py +32 -33
deepdoctection/eval/base.py +8 -9
deepdoctection/eval/cocometric.py +15 -13
deepdoctection/eval/eval.py +41 -37
deepdoctection/eval/tedsmetric.py +30 -23
deepdoctection/eval/tp_eval_callback.py +16 -19
deepdoctection/extern/__init__.py +2 -7
deepdoctection/extern/base.py +339 -134
deepdoctection/extern/d2detect.py +85 -113
deepdoctection/extern/deskew.py +14 -11
deepdoctection/extern/doctrocr.py +141 -130
deepdoctection/extern/fastlang.py +27 -18
deepdoctection/extern/hfdetr.py +71 -62
deepdoctection/extern/hflayoutlm.py +504 -211
deepdoctection/extern/hflm.py +230 -0
deepdoctection/extern/model.py +488 -302
deepdoctection/extern/pdftext.py +23 -19
deepdoctection/extern/pt/__init__.py +1 -3
deepdoctection/extern/pt/nms.py +6 -2
deepdoctection/extern/pt/ptutils.py +29 -19
deepdoctection/extern/tessocr.py +39 -38
deepdoctection/extern/texocr.py +18 -18
deepdoctection/extern/tp/tfutils.py +57 -9
deepdoctection/extern/tp/tpcompat.py +21 -14
deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
deepdoctection/extern/tpdetect.py +45 -53
deepdoctection/mapper/__init__.py +3 -8
deepdoctection/mapper/cats.py +27 -29
deepdoctection/mapper/cocostruct.py +10 -10
deepdoctection/mapper/d2struct.py +27 -26
deepdoctection/mapper/hfstruct.py +13 -8
deepdoctection/mapper/laylmstruct.py +178 -37
deepdoctection/mapper/maputils.py +12 -11
deepdoctection/mapper/match.py +2 -2
deepdoctection/mapper/misc.py +11 -9
deepdoctection/mapper/pascalstruct.py +4 -4
deepdoctection/mapper/prodigystruct.py +5 -5
deepdoctection/mapper/pubstruct.py +84 -92
deepdoctection/mapper/tpstruct.py +5 -5
deepdoctection/mapper/xfundstruct.py +33 -33
deepdoctection/pipe/__init__.py +1 -1
deepdoctection/pipe/anngen.py +12 -14
deepdoctection/pipe/base.py +52 -106
deepdoctection/pipe/common.py +72 -59
deepdoctection/pipe/concurrency.py +16 -11
deepdoctection/pipe/doctectionpipe.py +24 -21
deepdoctection/pipe/language.py +20 -25
deepdoctection/pipe/layout.py +20 -16
deepdoctection/pipe/lm.py +75 -105
deepdoctection/pipe/order.py +194 -89
deepdoctection/pipe/refine.py +111 -124
deepdoctection/pipe/segment.py +156 -161
deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
deepdoctection/pipe/text.py +37 -36
deepdoctection/pipe/transform.py +19 -16
deepdoctection/train/__init__.py +6 -12
deepdoctection/train/d2_frcnn_train.py +48 -41
deepdoctection/train/hf_detr_train.py +41 -30
deepdoctection/train/hf_layoutlm_train.py +153 -135
deepdoctection/train/tp_frcnn_train.py +32 -31
deepdoctection/utils/concurrency.py +1 -1
deepdoctection/utils/context.py +13 -6
deepdoctection/utils/develop.py +4 -4
deepdoctection/utils/env_info.py +87 -125
deepdoctection/utils/file_utils.py +6 -11
deepdoctection/utils/fs.py +22 -18
deepdoctection/utils/identifier.py +2 -2
deepdoctection/utils/logger.py +16 -15
deepdoctection/utils/metacfg.py +7 -7
deepdoctection/utils/mocks.py +93 -0
deepdoctection/utils/pdf_utils.py +11 -11
deepdoctection/utils/settings.py +185 -181
deepdoctection/utils/tqdm.py +1 -1
deepdoctection/utils/transform.py +14 -9
deepdoctection/utils/types.py +104 -0
deepdoctection/utils/utils.py +7 -7
deepdoctection/utils/viz.py +74 -72
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
deepdoctection-0.33.dist-info/RECORD +146 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
deepdoctection/utils/detection_types.py +0 -68
deepdoctection-0.31.dist-info/RECORD +0 -144
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
{deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0

deepdoctection/utils/settings.py CHANGED Viewed

@@ -18,11 +18,12 @@
 """
 Module for funcs and constants that maintain general settings
 """
+from __future__ import annotations
 import os
 from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
 import catalogue  # type: ignore
@@ -34,7 +35,7 @@ class ObjectTypes(str, Enum):
         return f"<{self.__class__.__name__}.{self.name}>"
     @classmethod
-    def from_value(cls, value: str) -> "ObjectTypes":
+    def from_value(cls, value: str) -> ObjectTypes:
         """Getting the enum member from a given string value
         :param value: string value to get the enum member
@@ -56,262 +57,265 @@ object_types_registry = catalogue.create("deepdoctection", "settings", entry_poi
 class DefaultType(ObjectTypes):
     """Type for default member"""
-    default_type = "default_type"
+    DEFAULT_TYPE = "default_type"
 @object_types_registry.register("PageType")
 class PageType(ObjectTypes):
     """Type for document page properties"""
-    document_type = "document_type"
-    language = "language"
-    angle = "angle"
+    DOCUMENT_TYPE = "document_type"
+    LANGUAGE = "language"
+    ANGLE = "angle"
 @object_types_registry.register("SummaryType")
 class SummaryType(ObjectTypes):
     """Summary type member"""
-    summary = "summary"
+    SUMMARY = "summary"
 @object_types_registry.register("DocumentType")
 class DocumentType(ObjectTypes):
     """Document types"""
-    letter = "letter"
-    form = "form"
-    email = "email"
-    handwritten = "handwritten"
-    advertisement = "advertisement"
-    scientific_report = "scientific_report"
-    scientific_publication = "scientific_publication"
-    specification = "specification"
-    file_folder = "file_folder"
-    news_article = "news_article"
-    budget = "budget"
-    invoice = "invoice"
-    presentation = "presentation"
-    questionnaire = "questionnaire"
-    resume = "resume"
-    memo = "memo"
-    financial_report = "financial_report"
-    laws_and_regulations = "laws_and_regulations"
-    government_tenders = "government_tenders"
-    manuals = "manuals"
-    patents = "patents"
+    LETTER = "letter"
+    FORM = "form"
+    EMAIL = "email"
+    HANDWRITTEN = "handwritten"
+    ADVERTISEMENT = "advertisement"
+    SCIENTIFIC_REPORT = "scientific_report"
+    SCIENTIFIC_PUBLICATION = "scientific_publication"
+    SPECIFICATION = "specification"
+    FILE_FOLDER = "file_folder"
+    NEWS_ARTICLE = "news_article"
+    BUDGET = "budget"
+    INVOICE = "invoice"
+    PRESENTATION = "presentation"
+    QUESTIONNAIRE = "questionnaire"
+    RESUME = "resume"
+    MEMO = "memo"
+    FINANCIAL_REPORT = "financial_report"
+    LAWS_AND_REGULATIONS = "laws_and_regulations"
+    GOVERNMENT_TENDERS = "government_tenders"
+    MANUALS = "manuals"
+    PATENTS = "patents"
 @object_types_registry.register("LayoutType")
 class LayoutType(ObjectTypes):
     """Layout types"""
-    table = "table"
-    table_rotated = "table_rotated"
-    figure = "figure"
-    list = "list"
-    text = "text"
-    title = "title"  # type: ignore
-    logo = "logo"
-    signature = "signature"
-    caption = "caption"
-    footnote = "footnote"
-    formula = "formula"
-    page_footer = "page_footer"
-    page_header = "page_header"
-    section_header = "section_header"
-    page = "page"
-    cell = "cell"
-    row = "row"
-    column = "column"
-    word = "word"
-    line = "line"
-    background = "background"
+    TABLE = "table"
+    TABLE_ROTATED = "table_rotated"
+    FIGURE = "figure"
+    LIST = "list"
+    TEXT = "text"
+    TITLE = "title"
+    LOGO = "logo"
+    SIGNATURE = "signature"
+    CAPTION = "caption"
+    FOOTNOTE = "footnote"
+    FORMULA = "formula"
+    PAGE_FOOTER = "page_footer"
+    PAGE_HEADER = "page_header"
+    SECTION_HEADER = "section_header"
+    PAGE = "page"
+    CELL = "cell"
+    ROW = "row"
+    COLUMN = "column"
+    WORD = "word"
+    LINE = "line"
+    BACKGROUND = "background"
+    PAGE_NUMBER = "page_number"
+    KEY_VALUE_AREA = "key_value_area"
 @object_types_registry.register("TableType")
 class TableType(ObjectTypes):
     """Types for table properties"""
-    item = "item"
-    number_of_rows = "number_of_rows"
-    number_of_columns = "number_of_columns"
-    max_row_span = "max_row_span"
-    max_col_span = "max_col_span"
-    html = "html"
+    ITEM = "item"
+    NUMBER_OF_ROWS = "number_of_rows"
+    NUMBER_OF_COLUMNS = "number_of_columns"
+    MAX_ROW_SPAN = "max_row_span"
+    MAX_COL_SPAN = "max_col_span"
+    HTML = "html"
 @object_types_registry.register("CellType")
 class CellType(ObjectTypes):
     """Types for cell properties"""
-    header = "header"
-    body = "body"
-    row_number = "row_number"
-    row_span = "row_span"
-    row_header = "row_header"
-    projected_row_header = "projected_row_header"
-    column_number = "column_number"
-    column_span = "column_span"
-    column_header = "column_header"
-    spanning = "spanning"
+    HEADER = "header"
+    BODY = "body"
+    ROW_NUMBER = "row_number"
+    ROW_SPAN = "row_span"
+    ROW_HEADER = "row_header"
+    PROJECTED_ROW_HEADER = "projected_row_header"
+    COLUMN_NUMBER = "column_number"
+    COLUMN_SPAN = "column_span"
+    COLUMN_HEADER = "column_header"
+    SPANNING = "spanning"
 @object_types_registry.register("WordType")
 class WordType(ObjectTypes):
     """Types for word properties"""
-    characters = "characters"
-    block = "block"
-    token_class = "token_class"
-    tag = "tag"
-    token_tag = "token_tag"
-    text_line = "text_line"
-    character_type = "character_type"
-    printed = "printed"
-    handwritten = "handwritten"
+    CHARACTERS = "characters"
+    BLOCK = "block"
+    TOKEN_CLASS = "token_class"
+    TAG = "tag"
+    TOKEN_TAG = "token_tag"
+    TEXT_LINE = "text_line"
+    CHARACTER_TYPE = "character_type"
+    PRINTED = "printed"
+    HANDWRITTEN = "handwritten"
 @object_types_registry.register("TokenClasses")
 class TokenClasses(ObjectTypes):
     """Types for token classes"""
-    header = "header"
-    question = "question"
-    answer = "answer"
-    other = "other"
+    HEADER = "header"
+    QUESTION = "question"
+    ANSWER = "answer"
+    OTHER = "other"
 @object_types_registry.register("BioTag")
 class BioTag(ObjectTypes):
     """Types for tags"""
-    begin = "B"
-    inside = "I"
-    outside = "O"
-    single = "S"
-    end = "E"
+    BEGIN = "B"
+    INSIDE = "I"
+    OUTSIDE = "O"
+    SINGLE = "S"
+    END = "E"
 @object_types_registry.register("TokenClassWithTag")
 class TokenClassWithTag(ObjectTypes):
     """Types for token classes with tags, e.g. B-answer"""
-    b_answer = "B-answer"
-    b_header = "B-header"
-    b_question = "B-question"
-    e_answer = "E-answer"
-    e_header = "E-header"
-    e_question = "E-question"
-    i_answer = "I-answer"
-    i_header = "I-header"
-    i_question = "I-question"
-    s_answer = "S-answer"
-    s_header = "S-header"
-    s_question = "S-question"
+    B_ANSWER = "B-answer"
+    B_HEADER = "B-header"
+    B_QUESTION = "B-question"
+    E_ANSWER = "E-answer"
+    E_HEADER = "E-header"
+    E_QUESTION = "E-question"
+    I_ANSWER = "I-answer"
+    I_HEADER = "I-header"
+    I_QUESTION = "I-question"
+    S_ANSWER = "S-answer"
+    S_HEADER = "S-header"
+    S_QUESTION = "S-question"
 @object_types_registry.register("Relationships")
 class Relationships(ObjectTypes):
     """Types for describing relationships between types"""
-    child = "child"
-    reading_order = "reading_order"
-    semantic_entity_link = "semantic_entity_link"
+    CHILD = "child"
+    READING_ORDER = "reading_order"
+    SEMANTIC_ENTITY_LINK = "semantic_entity_link"
 @object_types_registry.register("Languages")
 class Languages(ObjectTypes):
     """Language types"""
-    english = "eng"
-    russian = "rus"
-    german = "deu"
-    french = "fre"
-    italian = "ita"
-    japanese = "jpn"
-    spanish = "spa"
-    cebuano = "ceb"
-    turkish = "tur"
-    portuguese = "por"
-    ukrainian = "ukr"
-    esperanto = "epo"
-    polish = "pol"
-    swedish = "swe"
-    dutch = "dut"
-    hebrew = "heb"
-    chinese = "chi"
-    hungarian = "hun"
-    arabic = "ara"
-    catalan = "cat"
-    finnish = "fin"
-    czech = "cze"
-    persian = "per"
-    serbian = "srp"
-    greek = "gre"
-    vietnamese = "vie"
-    bulgarian = "bul"
-    korean = "kor"
-    norwegian = "nor"
-    macedonian = "mac"
-    romanian = "rum"
-    indonesian = "ind"
-    thai = "tha"
-    armenian = "arm"
-    danish = "dan"
-    tamil = "tam"
-    hindi = "hin"
-    croatian = "hrv"
-    belarusian = "bel"
-    georgian = "geo"
-    telugu = "tel"
-    kazakh = "kaz"
-    waray = "war"
-    lithuanian = "lit"
-    scottish = "glg"
-    slovak = "slo"
-    benin = "ben"
-    basque = "baq"
-    slovenian = "slv"
-    malayalam = "mal"
-    marathi = "mar"
-    estonian = "est"
-    azerbaijani = "aze"
-    albanian = "alb"
-    latin = "lat"
-    bosnian = "bos"
-    norwegian_nynorsk = "nno"
-    urdu = "urd"
-    not_defined = "nn"
+    ENGLISH = "eng"
+    RUSSIAN = "rus"
+    GERMAN = "deu"
+    FRENCH = "fre"
+    ITALIAN = "ita"
+    JAPANESE = "jpn"
+    SPANISH = "spa"
+    CEBUANO = "ceb"
+    TURKISH = "tur"
+    PORTUGUESE = "por"
+    UKRAINIAN = "ukr"
+    ESPERANTO = "epo"
+    POLISH = "pol"
+    SWEDISH = "swe"
+    DUTCH = "dut"
+    HEBREW = "heb"
+    CHINESE = "chi"
+    HUNGARIAN = "hun"
+    ARABIC = "ara"
+    CATALAN = "cat"
+    FINNISH = "fin"
+    CZECH = "cze"
+    PERSIAN = "per"
+    SERBIAN = "srp"
+    GREEK = "gre"
+    VIETNAMESE = "vie"
+    BULGARIAN = "bul"
+    KOREAN = "kor"
+    NORWEGIAN = "nor"
+    MACEDONIAN = "mac"
+    ROMANIAN = "rum"
+    INDONESIAN = "ind"
+    THAI = "tha"
+    ARMENIAN = "arm"
+    DANISH = "dan"
+    TAMIL = "tam"
+    HINDI = "hin"
+    CROATIAN = "hrv"
+    BELARUSIAN = "bel"
+    GEORGIAN = "geo"
+    TELUGU = "tel"
+    KAZAKH = "kaz"
+    WARAY = "war"
+    LITHUANIAN = "lit"
+    SCOTTISH = "glg"
+    SLOVAK = "slo"
+    BENIN = "ben"
+    BASQUE = "baq"
+    SLOVENIAN = "slv"
+    MALAYALAM = "mal"
+    MARATHI = "mar"
+    ESTONIAN = "est"
+    AZERBAIJANI = "aze"
+    ALBANIAN = "alb"
+    LATIN = "lat"
+    BOSNIAN = "bos"
+    NORWEGIAN_NOVOSIBIRSK = "nno"
+    URDU = "urd"
+    NOT_DEFINED = "nn"
 @object_types_registry.register("DatasetType")
 class DatasetType(ObjectTypes):
     """Dataset types"""
-    object_detection = "object_detection"
-    sequence_classification = "sequence_classification"
-    token_classification = "token_classification"
-    publaynet = "publaynet"
+    OBJECT_DETECTION = "object_detection"
+    SEQUENCE_CLASSIFICATION = "sequence_classification"
+    TOKEN_CLASSIFICATION = "token_classification"
+    PUBLAYNET = "publaynet"
+    DEFAULT = "default"
 _TOKEN_AND_TAG_TO_TOKEN_CLASS_WITH_TAG = {
-    (TokenClasses.header, BioTag.begin): TokenClassWithTag.b_header,
-    (TokenClasses.header, BioTag.inside): TokenClassWithTag.i_header,
-    (TokenClasses.header, BioTag.end): TokenClassWithTag.e_header,
-    (TokenClasses.header, BioTag.single): TokenClassWithTag.s_header,
-    (TokenClasses.answer, BioTag.begin): TokenClassWithTag.b_answer,
-    (TokenClasses.answer, BioTag.inside): TokenClassWithTag.i_answer,
-    (TokenClasses.answer, BioTag.end): TokenClassWithTag.e_answer,
-    (TokenClasses.answer, BioTag.single): TokenClassWithTag.s_answer,
-    (TokenClasses.question, BioTag.begin): TokenClassWithTag.b_question,
-    (TokenClasses.question, BioTag.inside): TokenClassWithTag.i_question,
-    (TokenClasses.question, BioTag.end): TokenClassWithTag.e_question,
-    (TokenClasses.question, BioTag.single): TokenClassWithTag.s_question,
-    (TokenClasses.other, BioTag.outside): BioTag.outside,
-    (TokenClasses.header, BioTag.outside): BioTag.outside,
-    (TokenClasses.answer, BioTag.outside): BioTag.outside,
-    (TokenClasses.question, BioTag.outside): BioTag.outside,
+    (TokenClasses.HEADER, BioTag.BEGIN): TokenClassWithTag.B_HEADER,
+    (TokenClasses.HEADER, BioTag.INSIDE): TokenClassWithTag.I_HEADER,
+    (TokenClasses.HEADER, BioTag.END): TokenClassWithTag.E_HEADER,
+    (TokenClasses.HEADER, BioTag.SINGLE): TokenClassWithTag.S_HEADER,
+    (TokenClasses.ANSWER, BioTag.BEGIN): TokenClassWithTag.B_ANSWER,
+    (TokenClasses.ANSWER, BioTag.INSIDE): TokenClassWithTag.I_ANSWER,
+    (TokenClasses.ANSWER, BioTag.END): TokenClassWithTag.E_ANSWER,
+    (TokenClasses.ANSWER, BioTag.SINGLE): TokenClassWithTag.S_ANSWER,
+    (TokenClasses.QUESTION, BioTag.BEGIN): TokenClassWithTag.B_QUESTION,
+    (TokenClasses.QUESTION, BioTag.INSIDE): TokenClassWithTag.I_QUESTION,
+    (TokenClasses.QUESTION, BioTag.END): TokenClassWithTag.E_QUESTION,
+    (TokenClasses.QUESTION, BioTag.SINGLE): TokenClassWithTag.S_QUESTION,
+    (TokenClasses.OTHER, BioTag.OUTSIDE): BioTag.OUTSIDE,
+    (TokenClasses.HEADER, BioTag.OUTSIDE): BioTag.OUTSIDE,
+    (TokenClasses.ANSWER, BioTag.OUTSIDE): BioTag.OUTSIDE,
+    (TokenClasses.QUESTION, BioTag.OUTSIDE): BioTag.OUTSIDE,
 }
@@ -333,7 +337,7 @@ def token_class_tag_to_token_class_with_tag(token: ObjectTypes, tag: ObjectTypes
 def token_class_with_tag_to_token_class_and_tag(
     token_class_with_tag: ObjectTypes,
-) -> Optional[Tuple[ObjectTypes, ObjectTypes]]:
+) -> Optional[tuple[ObjectTypes, ObjectTypes]]:
     """
     This is the reverse mapping from TokenClassWithTag members to TokenClasses and BioTag
@@ -357,7 +361,7 @@ def update_all_types_dict() -> None:
         _ALL_TYPES_DICT.update({e.value: e for e in obj})
-_OLD_TO_NEW_OBJ_TYPE: Dict[str, str] = {
+_OLD_TO_NEW_OBJ_TYPE: dict[str, str] = {
     "DOC_CLASS": "document_type",
     "CHARS": "characters",
     "BIO_TAG": "tag",
@@ -380,10 +384,10 @@ def _get_new_obj_type_str(obj_type: str) -> str:
     return _OLD_TO_NEW_OBJ_TYPE.get(obj_type, obj_type)
-_BLACK_LIST: List[str] = ["B", "I", "O", "E", "S"]
+_BLACK_LIST: list[str] = ["B", "I", "O", "E", "S"]
-def _get_black_list() -> List[str]:
+def _get_black_list() -> list[str]:
     return _BLACK_LIST

deepdoctection/utils/tqdm.py CHANGED Viewed

@@ -23,7 +23,7 @@ from typing import Dict, Optional, Union
 from tqdm import tqdm
-from .detection_types import TqdmType
+from .types import TqdmType
 __all__ = ["get_tqdm", "get_tqdm_default_kwargs"]

deepdoctection/utils/transform.py CHANGED Viewed

@@ -21,6 +21,8 @@ of coordinates. Most have the ideas have been taken from
 <https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/imgaug/transform.py> .
 """
+from __future__ import annotations
 from abc import ABC, abstractmethod
 from typing import Literal, Optional, Union
@@ -28,7 +30,7 @@ import numpy as np
 import numpy.typing as npt
 from numpy import float32
-from .detection_types import ImageType
+from .types import PixelValues
 from .viz import viz_handler
 __all__ = ["ResizeTransform", "InferenceResize", "PadTransform", "normalize_image"]
@@ -45,7 +47,7 @@ class BaseTransform(ABC):
     """
     @abstractmethod
-    def apply_image(self, img: ImageType) -> ImageType:
+    def apply_image(self, img: PixelValues) -> PixelValues:
         """The transformation that should be applied to the image"""
         raise NotImplementedError()
@@ -77,7 +79,7 @@ class ResizeTransform(BaseTransform):
         self.new_w = int(new_w)
         self.interp = interp
-    def apply_image(self, img: ImageType) -> ImageType:
+    def apply_image(self, img: PixelValues) -> PixelValues:
         assert img.shape[:2] == (self.h, self.w)
         ret = viz_handler.resize(img, self.new_w, self.new_h, self.interp)
         if img.ndim == 3 and ret.ndim == 2:
@@ -85,7 +87,8 @@ class ResizeTransform(BaseTransform):
         return ret
     def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
-        """Transformation that should be applied to coordinates"""
+        """Transformation that should be applied to coordinates. Coords are supposed to to be passed as
+        np array of points"""
         coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
         coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
         return coords
@@ -106,7 +109,7 @@ class InferenceResize:
         self.max_size = max_size
         self.interp = interp
-    def get_transform(self, img: ImageType) -> ResizeTransform:
+    def get_transform(self, img: PixelValues) -> ResizeTransform:
         """
         get transform
         """
@@ -129,7 +132,9 @@ class InferenceResize:
         return ResizeTransform(h, w, new_h, new_w, self.interp)
-def normalize_image(image: ImageType, pixel_mean: npt.NDArray[float32], pixel_std: npt.NDArray[float32]) -> ImageType:
+def normalize_image(
+    image: PixelValues, pixel_mean: npt.NDArray[float32], pixel_std: npt.NDArray[float32]
+) -> PixelValues:
     """
     Preprocess pixel values of an image by rescaling.
@@ -140,7 +145,7 @@ def normalize_image(image: ImageType, pixel_mean: npt.NDArray[float32], pixel_st
     return (image - pixel_mean) * (1.0 / pixel_std)
-def pad_image(image: ImageType, top: int, right: int, bottom: int, left: int) -> ImageType:
+def pad_image(image: PixelValues, top: int, right: int, bottom: int, left: int) -> PixelValues:
     """Pad an image with white color and with given top/bottom/right/left pixel values. Only white padding is
     currently supported
@@ -181,7 +186,7 @@ class PadTransform(BaseTransform):
         self.image_height: Optional[int] = None
         self.mode = mode
-    def apply_image(self, img: ImageType) -> ImageType:
+    def apply_image(self, img: PixelValues) -> PixelValues:
         """Apply padding to image"""
         self.image_width = img.shape[1]
         self.image_height = img.shape[0]
@@ -214,6 +219,6 @@ class PadTransform(BaseTransform):
             coords[:, 1] = np.maximum(coords[:, 1] - self.top, np.zeros(coords[:, 1].shape))
         return coords
-    def clone(self) -> "PadTransform":
+    def clone(self) -> PadTransform:
         """clone"""
         return self.__class__(self.top, self.right, self.bottom, self.left, self.mode)

deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

Potentially problematic release.

deepdoctection 0.31py3-none-any.whl → 0.33py3-none-any.whl