deepdoctection 0.39.2__py3-none-any.whl → 0.39.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.39.2"
28
+ __version__ = "0.39.4"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -702,11 +702,11 @@ class Image:
702
702
  return get_uuid(self.image_id, *container_ids)
703
703
 
704
704
  def save(
705
- self,
706
- image_to_json: bool = True,
707
- highest_hierarchy_only: bool = False,
708
- path: Optional[PathLikeOrStr] = None,
709
- dry: bool = False,
705
+ self,
706
+ image_to_json: bool = True,
707
+ highest_hierarchy_only: bool = False,
708
+ path: Optional[PathLikeOrStr] = None,
709
+ dry: bool = False,
710
710
  ) -> Optional[Union[ImageDict, str]]:
711
711
  """
712
712
  Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
@@ -719,6 +719,18 @@ class Image:
719
719
 
720
720
  :return: optional dict
721
721
  """
722
+
723
+ def set_image_keys_to_none(d): # type: ignore
724
+ if isinstance(d, dict):
725
+ for key, value in d.items():
726
+ if key == '_image':
727
+ d[key] = None
728
+ else:
729
+ set_image_keys_to_none(value)
730
+ elif isinstance(d, list):
731
+ for item in d:
732
+ set_image_keys_to_none(item)
733
+
722
734
  if path is None:
723
735
  path = Path(self.location)
724
736
  path = Path(path)
@@ -734,7 +746,7 @@ class Image:
734
746
  export_dict = self.as_dict()
735
747
  export_dict["location"] = fspath(export_dict["location"])
736
748
  if not image_to_json:
737
- export_dict["_image"] = None
749
+ set_image_keys_to_none(export_dict)
738
750
  if dry:
739
751
  return export_dict
740
752
  with open(path_json, "w", encoding="UTF-8") as file:
@@ -18,6 +18,7 @@
18
18
  """
19
19
  Module for DatasetRegistry
20
20
  """
21
+ import inspect
21
22
 
22
23
  import catalogue # type: ignore
23
24
  from tabulate import tabulate
@@ -47,7 +48,10 @@ def get_dataset(name: str) -> DatasetBase:
47
48
  :param name: A dataset name
48
49
  :return: An instance of a dataset
49
50
  """
50
- return dataset_registry.get(name)()
51
+ ds = dataset_registry.get(name)
52
+ if inspect.isclass(ds):
53
+ return ds()
54
+ return ds
51
55
 
52
56
 
53
57
  def print_dataset_infos(add_license: bool = True, add_info: bool = True) -> None:
@@ -79,8 +79,8 @@ def _load_model(
79
79
  state_dict = torch.load(os.fspath(path_weights), map_location=device)
80
80
  for key in list(state_dict.keys()):
81
81
  state_dict["model." + key] = state_dict.pop(key)
82
- doctr_predictor.load_state_dict(state_dict)
83
- doctr_predictor.to(device)
82
+ doctr_predictor.load_state_dict(state_dict) # type: ignore
83
+ doctr_predictor.to(device) # type: ignore
84
84
  elif lib == "TF":
85
85
  # Unzip the archive
86
86
  params_path = Path(path_weights).parent
@@ -88,9 +88,9 @@ def _load_model(
88
88
  if is_zip_path:
89
89
  with ZipFile(path_weights, "r") as file:
90
90
  file.extractall(path=params_path)
91
- doctr_predictor.model.load_weights(params_path / "weights")
91
+ doctr_predictor.model.load_weights(params_path / "weights") # type: ignore
92
92
  else:
93
- doctr_predictor.model.load_weights(os.fspath(path_weights))
93
+ doctr_predictor.model.load_weights(os.fspath(path_weights)) # type: ignore
94
94
 
95
95
 
96
96
  def auto_select_lib_for_doctr() -> Literal["PT", "TF"]:
@@ -125,7 +125,7 @@ def doctr_predict_text_lines(
125
125
  DetectionResult(
126
126
  box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.WORD
127
127
  )
128
- for box in raw_output[0]["words"]
128
+ for box in raw_output[0]["words"] # type: ignore
129
129
  ]
130
130
  return detection_results
131
131
 
@@ -480,7 +480,7 @@ class DoctrTextRecognizer(TextRecognizer):
480
480
  return auto_select_lib_for_doctr()
481
481
 
482
482
  def clear_model(self) -> None:
483
- self.doctr_predictor = None
483
+ self.doctr_predictor = None # type: ignore
484
484
 
485
485
 
486
486
  class DocTrRotationTransformer(ImageTransformer):
@@ -527,7 +527,8 @@ class DocTrRotationTransformer(ImageTransformer):
527
527
  return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
528
528
 
529
529
  def predict(self, np_img: PixelValues) -> DetectionResult:
530
- angle = estimate_orientation(np_img, self.number_contours, self.ratio_threshold_for_lines)
530
+ angle = estimate_orientation(np_img, n_ct=self.number_contours,
531
+ ratio_threshold_for_lines=self.ratio_threshold_for_lines)
531
532
  if angle < 0:
532
533
  angle += 360
533
534
  return DetectionResult(angle=round(angle, 2))
@@ -32,7 +32,7 @@ from .maputils import LabelSummarizer, curry
32
32
  @curry
33
33
  def cat_to_sub_cat(
34
34
  dp: Image,
35
- categories_dict_names_as_key: dict[TypeOrStr, int],
35
+ categories_dict_names_as_key: Optional[dict[TypeOrStr, int]] = None,
36
36
  cat_to_sub_cat_dict: Optional[dict[TypeOrStr, TypeOrStr]] = None,
37
37
  ) -> Image:
38
38
  """
@@ -45,7 +45,8 @@ def cat_to_sub_cat(
45
45
  :param cat_to_sub_cat_dict: e.g. {'foo': 'sub_cat_1', 'bak': 'sub_cat_2'}
46
46
  :return: Image with updated Annotations
47
47
  """
48
-
48
+ if categories_dict_names_as_key is None:
49
+ categories_dict_names_as_key = {}
49
50
  if cat_to_sub_cat_dict is None:
50
51
  return dp
51
52
  cat_to_sub_cat_dict_obj_type = {get_type(key): get_type(value) for key, value in cat_to_sub_cat_dict.items()}
@@ -54,7 +55,7 @@ def cat_to_sub_cat(
54
55
  sub_cat = ann.get_sub_category(sub_cat_type)
55
56
  if sub_cat:
56
57
  ann.category_name = sub_cat.category_name
57
- ann.category_id = categories_dict_names_as_key[ann.category_name]
58
+ ann.category_id = categories_dict_names_as_key.get(ann.category_name,DEFAULT_CATEGORY_ID)
58
59
 
59
60
  return dp
60
61
 
@@ -1190,14 +1190,16 @@ class PubtablesSegmentationService(PipelineComponent):
1190
1190
  for key, value in cell_rn_cn_to_ann_id.items():
1191
1191
  if key[idx] == item_number:
1192
1192
  cell_ann = dp.get_annotation(annotation_ids=value)[0]
1193
- self.dp_manager.set_category_annotation(
1194
- item_header_cell_name, None, item_header_cell_name, cell_ann.annotation_id
1195
- )
1193
+ if item_header_cell_name not in cell_ann.sub_categories:
1194
+ self.dp_manager.set_category_annotation(
1195
+ item_header_cell_name, None, item_header_cell_name, cell_ann.annotation_id
1196
+ )
1196
1197
  else:
1197
1198
  cell_ann = dp.get_annotation(annotation_ids=value)[0]
1198
- self.dp_manager.set_category_annotation(
1199
- item_header_cell_name, None, CellType.BODY, cell_ann.annotation_id
1200
- )
1199
+ if CellType.BODY not in cell_ann.sub_categories:
1200
+ self.dp_manager.set_category_annotation(
1201
+ item_header_cell_name, None, CellType.BODY, cell_ann.annotation_id
1202
+ )
1201
1203
 
1202
1204
  # TODO: the summaries should be sub categories of the underlying ann
1203
1205
  self.dp_manager.set_summary_annotation(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39.2
3
+ Version: 0.39.4
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -57,7 +57,7 @@ Requires-Dist: tensorpack==0.11; extra == "tf"
57
57
  Requires-Dist: protobuf==3.20.1; extra == "tf"
58
58
  Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
59
59
  Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
60
- Requires-Dist: python-doctr==0.8.1; extra == "tf"
60
+ Requires-Dist: python-doctr==0.9.0; extra == "tf"
61
61
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
62
62
  Requires-Dist: boto3==1.34.102; extra == "tf"
63
63
  Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
@@ -88,7 +88,7 @@ Requires-Dist: tqdm==4.64.0; extra == "pt"
88
88
  Requires-Dist: timm>=0.9.16; extra == "pt"
89
89
  Requires-Dist: transformers>=4.48.0; extra == "pt"
90
90
  Requires-Dist: accelerate>=0.29.1; extra == "pt"
91
- Requires-Dist: python-doctr==0.8.1; extra == "pt"
91
+ Requires-Dist: python-doctr==0.9.0; extra == "pt"
92
92
  Requires-Dist: boto3==1.34.102; extra == "pt"
93
93
  Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
94
94
  Requires-Dist: fasttext-wheel; extra == "pt"
@@ -1,4 +1,4 @@
1
- deepdoctection/__init__.py,sha256=KpFnC7nCOVQOgeXDWt0fIjKoikD3MRj-_iSAbARIWeQ,12754
1
+ deepdoctection/__init__.py,sha256=4u3rGxdbvfFr1l6xzH6qO5MapLGspBv5XPhBGMeji_4,12754
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
4
  deepdoctection/analyzer/_config.py,sha256=1rfvVrp7cI2YLzpahD77aa1tZ_KFAIQ21DM1NWhxYiI,5058
@@ -19,14 +19,14 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
19
19
  deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
20
20
  deepdoctection/datapoint/box.py,sha256=UAdSnLexvFyg4KK1u9kXdJxhaWTwRxTU-cnQcvl37Q8,23410
21
21
  deepdoctection/datapoint/convert.py,sha256=gJbHY2V8nlMht1N5VdxTmWSsOeydpFPTJsaJHp6XGgE,7516
22
- deepdoctection/datapoint/image.py,sha256=S6yfsIRQgMCl6HYAcHYJSBcbfdYKKtebtkEkkkrXsMQ,33619
22
+ deepdoctection/datapoint/image.py,sha256=dImZfJr72jS8yanZx1n70p4lIv0Qa21-qlUcj0DZcls,34060
23
23
  deepdoctection/datapoint/view.py,sha256=XPyhbBr2cGIKdAiISBVZWxNxlSvN8kmGsD9P0mfpEEE,50772
24
24
  deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
25
  deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
26
26
  deepdoctection/datasets/base.py,sha256=DT4i-d74sIEiUNC6UspIHNJuHSK0t1dBv7qwadg4rLw,22341
27
27
  deepdoctection/datasets/dataflow_builder.py,sha256=cYU2zV3gZW2bFvMHimlO9VIl3BAUaCwML08cCIQ8Em4,4107
28
28
  deepdoctection/datasets/info.py,sha256=6y5TfiUhQppynbMFP5JmUPk95ggsVCtGIw4dYh2lVus,20501
29
- deepdoctection/datasets/registry.py,sha256=ZjzVzjsCgNXJuZZZtR98_yKocADmh4EBGV5JqJbGjWk,2543
29
+ deepdoctection/datasets/registry.py,sha256=tvzMUk34ZD3AsedS1DFfYATYLxm-7bn2-8J1AJiXGKM,2616
30
30
  deepdoctection/datasets/save.py,sha256=Y9508Qqp8gIGN7pbGgVBBnkiC6NdCb9L2YR4wVvEUxM,3350
31
31
  deepdoctection/datasets/instances/__init__.py,sha256=XEc_4vT5lDn6bbZID9ujDEumWu8Ec2W-QS4pI_bfWWE,1388
32
32
  deepdoctection/datasets/instances/doclaynet.py,sha256=wRZT7wMTilZBLZ1gKY2cWReD1EGT735vOOTy0pD0N6M,12038
@@ -53,7 +53,7 @@ deepdoctection/extern/__init__.py,sha256=9Iks9b4Q_LynjcV167TVCoK8YsQRUcA2jjmAmDN
53
53
  deepdoctection/extern/base.py,sha256=ONPgappl_P5HSwQr42FatuRnwMTvUPecPsCztDTN0Hw,24108
54
54
  deepdoctection/extern/d2detect.py,sha256=zrKv1yurApnjD7QZIZk_8LYCahjmN82MQUjHjv8zvkQ,22127
55
55
  deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1SA,3077
56
- deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
56
+ deepdoctection/extern/doctrocr.py,sha256=WrFA0N54fr9C_ahGzZJb4H-fNzz5wXQFveFiERYAm74,24637
57
57
  deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
58
58
  deepdoctection/extern/hfdetr.py,sha256=JzHrrTyzS9qh6T2TsvKboAGZkIhno2txmSoLQ5Vd-lo,12077
59
59
  deepdoctection/extern/hflayoutlm.py,sha256=tFaf90FRbZzhSycdp8rGkeiPywQa6UcTEEwbayIXkr0,57023
@@ -88,7 +88,7 @@ deepdoctection/extern/tp/tpfrcnn/utils/__init__.py,sha256=kiPlXxHlTGN9eI7YE9Bgwt
88
88
  deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py,sha256=aBLqPg_ApaiimtBRaOsLKTZZFIBh87vVtqjLPMaX9fQ,2379
89
89
  deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py,sha256=O-q1GQiOEd1lN1MQDsJvHwD2OmBO-qHNeqJ1Qnec93g,3539
90
90
  deepdoctection/mapper/__init__.py,sha256=Xqb34aCjslZDQnqQgCSvnloL5DbdT9eHhn-StpVPbzE,1130
91
- deepdoctection/mapper/cats.py,sha256=s73JzONV2UQ71szfljurk7H1-UjDBWsW4oNLs5xePUk,16474
91
+ deepdoctection/mapper/cats.py,sha256=Go9k9wiSid1aSPSteTCE0AgQ1tZmOA8pfOPkhKSQhhg,16601
92
92
  deepdoctection/mapper/cocostruct.py,sha256=GcbUpPFUg67pcOHQluWBFOFcGaYnlZcTmwBDERBVgCA,5978
93
93
  deepdoctection/mapper/d2struct.py,sha256=Dx-YnycsIQH4a5-9Gn_yMhiQ-gOFgMueNeH3rhXjuCU,8555
94
94
  deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
@@ -113,7 +113,7 @@ deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,175
113
113
  deepdoctection/pipe/order.py,sha256=PnJZiCnxFluJiECXLTZT0c1Rr66vIRBFraa_G41UA2k,40121
114
114
  deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
115
115
  deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
116
- deepdoctection/pipe/segment.py,sha256=CR83HQMW0hrRG8W6pFuB0YibxQMWpqI7_LaUIcJcQwo,59116
116
+ deepdoctection/pipe/segment.py,sha256=mWYRg7UR80PtIj1SIg_hiujDcCtLlvKJUP9vx4ZpW0Y,59318
117
117
  deepdoctection/pipe/sub_layout.py,sha256=N1RcID-boORcwsW_j0l64HpUu3rff0ge5qEanudLYgk,13838
118
118
  deepdoctection/pipe/text.py,sha256=h9q6d3HFOs7LOg-iwdLUPiQxrPqgunBVNmtYMBrfRQE,11180
119
119
  deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3nI7w,3798
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
141
141
  deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
142
142
  deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
143
  deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
- deepdoctection-0.39.2.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.39.2.dist-info/METADATA,sha256=Z6b0DeMKoXZOR6O7Yxu7Qt5_MLgCxxvyNuWsPAhFo0M,19741
146
- deepdoctection-0.39.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
147
- deepdoctection-0.39.2.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.39.2.dist-info/RECORD,,
144
+ deepdoctection-0.39.4.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
+ deepdoctection-0.39.4.dist-info/METADATA,sha256=9DvyyvO9Ofd9YFuqQ2SVO2Bi69WayBC2VZyrpMCimZQ,19741
146
+ deepdoctection-0.39.4.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
147
+ deepdoctection-0.39.4.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
+ deepdoctection-0.39.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5