deepdoctection 0.43.3__py3-none-any.whl → 0.43.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.43.3"
28
+ __version__ = "0.43.4"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -903,6 +903,7 @@ cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
903
903
  # This prevents accidental modification at runtime.
904
904
  cfg.freeze()
905
905
 
906
+
906
907
  def update_cfg_from_defaults() -> None:
907
908
  """
908
909
  Update the configuration with current values from IMAGE_DEFAULTS.
@@ -72,6 +72,18 @@ class ImageAnnotationBaseView(ImageAnnotation):
72
72
 
73
73
  base_page: Page
74
74
 
75
+ @property
76
+ def b64_image(self) -> Optional[str]:
77
+ """
78
+ Returns:
79
+ The base64 encoded image of the page if available, otherwise None.
80
+ """
81
+
82
+ if self.image is not None:
83
+ if self.image.image is not None:
84
+ return viz_handler.convert_np_to_b64(self.image.image)
85
+ return None
86
+
75
87
  @property
76
88
  def bbox(self) -> list[float]:
77
89
  """
@@ -157,7 +169,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
157
169
  """
158
170
 
159
171
  # sub categories and summary sub categories are valid attribute names
160
- attr_names = {"bbox", "np_image"}.union({cat.value for cat in self.sub_categories})
172
+ attr_names = {"bbox", "np_image", "b64_image"}.union({cat.value for cat in self.sub_categories})
161
173
  if self.image:
162
174
  attr_names = attr_names.union({cat.value for cat in self.image.summary.sub_categories.keys()})
163
175
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
@@ -774,6 +786,7 @@ class Page(Image):
774
786
  "residual_layouts",
775
787
  "document_summary",
776
788
  "document_mapping",
789
+ "b64_image",
777
790
  }
778
791
  include_residual_text_container: bool = True
779
792
 
@@ -902,6 +915,17 @@ class Page(Image):
902
915
  """
903
916
  return self.get_annotation(category_names=self.residual_text_block_categories)
904
917
 
918
+ @property
919
+ def b64_image(self) -> Optional[str]:
920
+ """
921
+ Returns:
922
+ The base64 encoded image of the page if available, otherwise None.
923
+ """
924
+
925
+ if self.image_orig.image is not None:
926
+ return viz_handler.convert_np_to_b64(self.image_orig.image)
927
+ return None
928
+
905
929
  @classmethod
906
930
  def from_image(
907
931
  cls,
@@ -1410,10 +1434,12 @@ class Page(Image):
1410
1434
  for block in block_with_order:
1411
1435
  all_words.extend(block.get_ordered_words()) # type: ignore
1412
1436
  return [
1413
- {"word": word.characters,
1414
- "entity": word.token_tag.value,
1415
- "annotation_id": word.annotation_id,
1416
- "successor_annotation_id": word.successor[0].annotation_id if word.successor else None}
1437
+ {
1438
+ "word": word.characters,
1439
+ "entity": word.token_tag.value,
1440
+ "annotation_id": word.annotation_id,
1441
+ "successor_annotation_id": word.successor[0].annotation_id if word.successor else None,
1442
+ }
1417
1443
  for word in all_words
1418
1444
  if word.token_tag not in (TokenClasses.OTHER, None)
1419
1445
  ]
@@ -269,10 +269,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
269
269
  if self.lib == "PT":
270
270
  self.device = get_torch_device(device)
271
271
 
272
- self.doctr_predictor = self.get_wrapped_model(self.architecture,
273
- self.path_weights,
274
- self.device,
275
- self.lib)
272
+ self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
276
273
 
277
274
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
278
275
  """
@@ -19,7 +19,7 @@
19
19
  Datapoint manager
20
20
  """
21
21
  from dataclasses import asdict
22
- from typing import Optional, Union, Sequence
22
+ from typing import Optional, Sequence, Union
23
23
 
24
24
  import numpy as np
25
25
 
@@ -801,14 +801,13 @@ class TextOrderService(TextLineServiceMixin):
801
801
  if self.include_residual_text_container:
802
802
  add_category.append(LayoutType.LINE)
803
803
 
804
- if set(self.floating_text_block_categories) <= set(
805
- self.text_block_categories + tuple(add_category)
806
- ):
807
- logger.warning("In most cases floating_text_block_categories must be a subset of text_block_categories. "
808
- "Adding categories to floating_text_block_categories, that do not belong to "
809
- "text_block_categories makes only sense for categories set have CHILD relationships with"
810
- " annotations that belong to text_block_categories.")
811
-
804
+ if set(self.floating_text_block_categories) <= set(self.text_block_categories + tuple(add_category)):
805
+ logger.warning(
806
+ "In most cases floating_text_block_categories must be a subset of text_block_categories. "
807
+ "Adding categories to floating_text_block_categories, that do not belong to "
808
+ "text_block_categories makes only sense for categories set have CHILD relationships with"
809
+ " annotations that belong to text_block_categories."
810
+ )
812
811
 
813
812
  def get_meta_annotation(self) -> MetaAnnotation:
814
813
  add_category = [self.text_container]
@@ -71,7 +71,6 @@ def log_deprecated(name: str, text: str, eos: str = "", max_num_warnings: Option
71
71
  logger.info(LoggingRecord(f"[Deprecated] {info_msg}"))
72
72
 
73
73
 
74
-
75
74
  def deprecated(
76
75
  text: str = "", eos: str = "", max_num_warnings: Optional[int] = None
77
76
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.3
3
+ Version: 0.43.4
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -1,7 +1,7 @@
1
- deepdoctection/__init__.py,sha256=nJ6cTH9fWxNIGSDjMS7AoU9hbJwfbb-SvTwOw6eiPAg,12964
1
+ deepdoctection/__init__.py,sha256=UftLKUS4Z03F_LCcON51Gx0XEKfCLp7VAw9MKysFLxQ,12964
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=wg0BcFwdCeREwzZfa--Yx8HUJ9LPv5z5PmLwtkZdPH8,772
4
- deepdoctection/analyzer/config.py,sha256=Uwi9MYOym0QXzFcip7bRXoy732P4tZVMFBUnUZk9c1w,41761
4
+ deepdoctection/analyzer/config.py,sha256=DToaXs59w7SpEi2vkeBEyrBwyyGiXUST_N99wL9nHoI,41762
5
5
  deepdoctection/analyzer/dd.py,sha256=2BGvZpl9o9khcaOV52-DPHMrs0DsqUO8cpdqFVHHzDQ,5176
6
6
  deepdoctection/analyzer/factory.py,sha256=DI0S38KAG2sIROrSximsWJsMbem91a9zXaeWsDNvkGg,37574
7
7
  deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
@@ -21,7 +21,7 @@ deepdoctection/datapoint/annotation.py,sha256=f32BNmzUGJoNMeGst2RGC2jmjJpzzjxyBR
21
21
  deepdoctection/datapoint/box.py,sha256=QAS8sK2Ge4_ysW6zOYkLlzNwhSyw_mhYcYsxscClEno,31453
22
22
  deepdoctection/datapoint/convert.py,sha256=6ENXX3tBdY8ogb2NBPxsOsQMGnQux8ol5nrUfWS5tYE,7352
23
23
  deepdoctection/datapoint/image.py,sha256=kqwCz8DSc19hQpkl_4L1_Ek7_2KrH5KsV9e0S-R4n5w,35147
24
- deepdoctection/datapoint/view.py,sha256=lVUTi2vfh6w7k92kcwQNpKmfOGgZmM12bYJUFrO5Pno,57163
24
+ deepdoctection/datapoint/view.py,sha256=YtoqafStrHqbfyD628-W1HOA2Gb0kUI2oaEiteBHjbA,57902
25
25
  deepdoctection/datasets/__init__.py,sha256=4ifjIwWCPYiS31GzUlVDScrkNOrb1eo5xHlRXNyg_58,994
26
26
  deepdoctection/datasets/adapter.py,sha256=VSLM_980aHi4TpgOxfxiBHiF_fUXyh348PXet6zTo-4,7779
27
27
  deepdoctection/datasets/base.py,sha256=HTIquJir2BZRTLl1HSQM0ICfvjIaWAjJeyz3BEHgdb0,23175
@@ -54,7 +54,7 @@ deepdoctection/extern/__init__.py,sha256=jG2qe5_X7fJFnJlx04Lf1KUTXKKKYtCkKR7WQ7l
54
54
  deepdoctection/extern/base.py,sha256=LomTR9HXcBU55MPDIA8D1rIamk7DUmToJmgcRXzCoeU,31650
55
55
  deepdoctection/extern/d2detect.py,sha256=I0oEkprr5iVpKpM3a3nknAU-sXwNoDQdp_B1gzzODsk,22374
56
56
  deepdoctection/extern/deskew.py,sha256=L_jU0rXh03qzwaT79EIqE_zYMUVeFwWDbsGbtahuL2k,3124
57
- deepdoctection/extern/doctrocr.py,sha256=vOI67NCeEznHzdY4zu1tggwg3_PfwV1eigsr2Z8EPzc,25543
57
+ deepdoctection/extern/doctrocr.py,sha256=jB0mnvGmmygoUu9e9zw2_HtAgQUdCJHbxMSt1cfK5bA,25381
58
58
  deepdoctection/extern/fastlang.py,sha256=4D9A-_hTXUcvXG6IJJknX34LrD71v08XtNdWgvXD7fE,4736
59
59
  deepdoctection/extern/hfdetr.py,sha256=N3eLNI5BsQS9_7YZyBeWndSgUydJij7ugZA9p4V1xaQ,14316
60
60
  deepdoctection/extern/hflayoutlm.py,sha256=3mZZ3byn00jSrLWO2vZFas9j4VrhbYQNmF1mwPG2ElQ,59642
@@ -103,7 +103,7 @@ deepdoctection/mapper/pubstruct.py,sha256=UTyfUmzMSuf2BXtdYwHjK7ngsIwAxSZjwTxDtz
103
103
  deepdoctection/mapper/tpstruct.py,sha256=dxtEVHYVnkH-zjjbHzkFrPgS9eheys6E-CMlsjaOnxo,5468
104
104
  deepdoctection/mapper/xfundstruct.py,sha256=JVs92T5l3HedQvdc6ZMACj2mq5Co4WCS9B-CnQ01opA,9092
105
105
  deepdoctection/pipe/__init__.py,sha256=E3cYAVWOvMzIN7jbKFyqLjFXahcFGcAGkb1uChM_XCY,1034
106
- deepdoctection/pipe/anngen.py,sha256=I7VFoTTa-sJCBwFJNaZeLD7pcrLjRPTX2I6-ZDCDHPI,16819
106
+ deepdoctection/pipe/anngen.py,sha256=0zrP6DoN83ct1pKxGFpwHonFpP2GZV62lGsneymgOJo,16819
107
107
  deepdoctection/pipe/base.py,sha256=oszB_DepcFtORvDdGTZZPWMhk01C68RUWXHjeX7SF3M,18163
108
108
  deepdoctection/pipe/common.py,sha256=OcsqHr_c66Yqt98hFeKwaa0mciWMCauw0HZ3YnHx8MU,24586
109
109
  deepdoctection/pipe/concurrency.py,sha256=_EKZi4eCeF3mVHytZL_fMwyqa25C2aR9g8vrIFB8iR4,9780
@@ -111,7 +111,7 @@ deepdoctection/pipe/doctectionpipe.py,sha256=ik5F92F3klI5Nve_AnyIRj-ApMoKHSR2Sjc
111
111
  deepdoctection/pipe/language.py,sha256=T5g5_2GIsbTltAmn_PFymMUMoik8_b0uJNx8f5dT9MM,5898
112
112
  deepdoctection/pipe/layout.py,sha256=oAldMtwyZee1IqpuflKKvmeL2Z_nXFiqwFMS4VYv5eI,6391
113
113
  deepdoctection/pipe/lm.py,sha256=nYI2bm0sc9d3JMlIPyNyd4XxXFRBIHRUYfMImuek6b4,19793
114
- deepdoctection/pipe/order.py,sha256=8CqEWUA6U9HxThKKGP9yJMbRaML2by7do0Gdhl_7AdI,40964
114
+ deepdoctection/pipe/order.py,sha256=apuJjZe2VhWVoYRPN-kLv9y6SULKhjPnGqFYRNqw8kQ,40938
115
115
  deepdoctection/pipe/refine.py,sha256=SrMcAWXRO5tJpqaZCEz9RzvjPyiQiE8fZ9TXBcaBKck,23310
116
116
  deepdoctection/pipe/registry.py,sha256=uT5fnHjffoNGk2JPuD2-pMYtO3Iko7-wrwVZVCWLtok,906
117
117
  deepdoctection/pipe/segment.py,sha256=rHhEWr5zZ1ppj-gMa-q-UCr1AYTWpUW7oA1umwebqBI,61302
@@ -126,7 +126,7 @@ deepdoctection/train/tp_frcnn_train.py,sha256=Tltb-v2JD5oPuHCZGA9B5DM4ZaidoBITlH
126
126
  deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
127
127
  deepdoctection/utils/concurrency.py,sha256=9ly81D5i2ZFzKfXMIUSmhT42eMs4QttsRhWXdkIk6Aw,5832
128
128
  deepdoctection/utils/context.py,sha256=5QfdzxsiSPnNs1qtJdgjguIoD8srLQ2W8oeDzwp9F78,4522
129
- deepdoctection/utils/develop.py,sha256=x2MhbmoKZyRluesmc01is7ldrUN9c0TX4OAuc1yt6dI,3569
129
+ deepdoctection/utils/develop.py,sha256=4myrqBDypM6tQ2a2Jo3Q20RuE_W2czykpXBwgXPrxNw,3568
130
130
  deepdoctection/utils/env_info.py,sha256=b1WohrfQuoL-BPN0_s8Rjtwzx-WKvCyaX2I4qYl1Emc,19878
131
131
  deepdoctection/utils/error.py,sha256=sIry8F5MZ0yLvKfAwVz90IorKWVvjoRqcC0L8qq8mLk,2480
132
132
  deepdoctection/utils/file_utils.py,sha256=EepfAZVADaqpBdVq2LOJXLFLsMd_oZF_FAKUHOAhiZ0,25246
@@ -142,8 +142,8 @@ deepdoctection/utils/transform.py,sha256=jgeCyQWLN9q79jCGW7jysyKUKcJ1AVMk8OslF-3
142
142
  deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
143
143
  deepdoctection/utils/utils.py,sha256=NBUb1qbx8Jm-AvYN1Sdbk0huXhbAKxZ-ZtOcMespsMM,7064
144
144
  deepdoctection/utils/viz.py,sha256=bujRIujvX317rPz4jBrj0yd3WP8wPjDUiI5GUrw9MzQ,27339
145
- deepdoctection-0.43.3.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
146
- deepdoctection-0.43.3.dist-info/METADATA,sha256=tFoZH2VB3ZkiTwzVmd1OBlg1f6eFLrKwwcLyYo6BX1g,13389
147
- deepdoctection-0.43.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
- deepdoctection-0.43.3.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
149
- deepdoctection-0.43.3.dist-info/RECORD,,
145
+ deepdoctection-0.43.4.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
146
+ deepdoctection-0.43.4.dist-info/METADATA,sha256=Rq3g8AYO5ClbhHXAHJVh7YkpeP22PBwpB_TN57TbSOI,13389
147
+ deepdoctection-0.43.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
+ deepdoctection-0.43.4.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
149
+ deepdoctection-0.43.4.dist-info/RECORD,,